fix bug #3678 (cuda::integral failures)
This commit is contained in:
@@ -439,8 +439,6 @@ namespace integral_detail
|
||||
|
||||
T sum = (tidx < cols) && (y < rows) ? *p : 0;
|
||||
|
||||
y += blockDim.y;
|
||||
|
||||
sums[threadIdx.x][threadIdx.y] = sum;
|
||||
__syncthreads();
|
||||
|
||||
@@ -467,14 +465,17 @@ namespace integral_detail
|
||||
if (threadIdx.y > 0)
|
||||
sum += sums[threadIdx.x][threadIdx.y - 1];
|
||||
|
||||
if (tidx < cols)
|
||||
sum += stepSum;
|
||||
stepSum += sums[threadIdx.x][blockDim.y - 1];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if ((tidx < cols) && (y < rows))
|
||||
{
|
||||
sum += stepSum;
|
||||
stepSum += sums[threadIdx.x][blockDim.y - 1];
|
||||
*p = sum;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
y += blockDim.y;
|
||||
}
|
||||
#else
|
||||
__shared__ T smem[32][32];
|
||||
|
Reference in New Issue
Block a user