From f1b5cbc8fe734b25220d2b4a1e7156b2017375e5 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 18 Sep 2013 01:14:41 +0400 Subject: [PATCH] ocl: fix invalid usage of alignSize (n is not a power of 2), added roundUp --- modules/core/include/opencv2/core/core.hpp | 1 + modules/ocl/src/initialization.cpp | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/modules/core/include/opencv2/core/core.hpp b/modules/core/include/opencv2/core/core.hpp index 9996c242a..af3a50c43 100644 --- a/modules/core/include/opencv2/core/core.hpp +++ b/modules/core/include/opencv2/core/core.hpp @@ -339,6 +339,7 @@ template static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_ */ static inline size_t alignSize(size_t sz, int n) { + assert((n & (n - 1)) == 0); // n is a power of 2 return (sz + n-1) & -n; } diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index 8f5fae3f8..0e16e75ae 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -679,6 +679,16 @@ namespace cv CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= clCxt->impl->maxWorkGroupSize ); } + static inline size_t roundUp(size_t sz, size_t n) + { + // we don't assume that n is a power of 2 (see alignSize) + // equal to divUp(sz, n) * n + size_t t = sz + n - 1; + size_t rem = t % n; + size_t result = t - rem; + return result; + } + #ifdef PRINT_KERNEL_RUN_TIME static double total_execute_time = 0; static double total_kernel_time = 0; @@ -702,9 +712,9 @@ namespace cv if ( localThreads != NULL) { - globalThreads[0] = alignSize(globalThreads[0], localThreads[0]); - globalThreads[1] = alignSize(globalThreads[1], localThreads[1]); - globalThreads[2] = alignSize(globalThreads[2], localThreads[2]); + globalThreads[0] = roundUp(globalThreads[0], localThreads[0]); + globalThreads[1] = roundUp(globalThreads[1], localThreads[1]); + globalThreads[2] = roundUp(globalThreads[2], localThreads[2]); cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads); }