Merge pull request #1468 from alalek:fix_alignSize_usage

This commit is contained in:
Andrey Pavlenko 2013-09-18 13:30:38 +04:00 committed by OpenCV Buildbot
commit d08b163657
2 changed files with 14 additions and 3 deletions

View File

@ -339,6 +339,7 @@ template<typename _Tp> static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_
*/
static inline size_t alignSize(size_t sz, int n)
{
assert((n & (n - 1)) == 0); // n is a power of 2
return (sz + n-1) & -n;
}

View File

@ -679,6 +679,16 @@ namespace cv
CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= clCxt->impl->maxWorkGroupSize );
}
static inline size_t roundUp(size_t sz, size_t n)
{
// we don't assume that n is a power of 2 (see alignSize)
// equal to divUp(sz, n) * n
size_t t = sz + n - 1;
size_t rem = t % n;
size_t result = t - rem;
return result;
}
#ifdef PRINT_KERNEL_RUN_TIME
static double total_execute_time = 0;
static double total_kernel_time = 0;
@ -702,9 +712,9 @@ namespace cv
if ( localThreads != NULL)
{
globalThreads[0] = alignSize(globalThreads[0], localThreads[0]);
globalThreads[1] = alignSize(globalThreads[1], localThreads[1]);
globalThreads[2] = alignSize(globalThreads[2], localThreads[2]);
globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
globalThreads[1] = roundUp(globalThreads[1], localThreads[1]);
globalThreads[2] = roundUp(globalThreads[2], localThreads[2]);
cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
}