Merge pull request #2749 from jet47:fix-bug-3678

This commit is contained in:
Roman Donchenko 2014-05-14 17:25:18 +04:00 committed by OpenCV Buildbot
commit cafcfc4d0f
2 changed files with 8 additions and 7 deletions

View File

@ -850,7 +850,7 @@ CUDA_TEST_P(Integral, Accuracy)
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Integral, testing::Combine( INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Integral, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
DIFFERENT_SIZES, testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(768, 1066)),
WHOLE_SUBMAT)); WHOLE_SUBMAT));
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -439,8 +439,6 @@ namespace integral_detail
T sum = (tidx < cols) && (y < rows) ? *p : 0; T sum = (tidx < cols) && (y < rows) ? *p : 0;
y += blockDim.y;
sums[threadIdx.x][threadIdx.y] = sum; sums[threadIdx.x][threadIdx.y] = sum;
__syncthreads(); __syncthreads();
@ -467,14 +465,17 @@ namespace integral_detail
if (threadIdx.y > 0) if (threadIdx.y > 0)
sum += sums[threadIdx.x][threadIdx.y - 1]; sum += sums[threadIdx.x][threadIdx.y - 1];
if (tidx < cols) sum += stepSum;
stepSum += sums[threadIdx.x][blockDim.y - 1];
__syncthreads();
if ((tidx < cols) && (y < rows))
{ {
sum += stepSum;
stepSum += sums[threadIdx.x][blockDim.y - 1];
*p = sum; *p = sum;
} }
__syncthreads(); y += blockDim.y;
} }
#else #else
__shared__ T smem[32][32]; __shared__ T smem[32][32];