Merge pull request #3959 from jet47:gpu-fixes

This commit is contained in:
Vadim Pisarevsky
2015-04-28 10:49:05 +00:00
5 changed files with 17 additions and 11 deletions

View File

@@ -114,7 +114,7 @@ namespace cv { namespace gpu { namespace device
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x));
sum = sum + 0.375f * src(src_y , b.idx_col_high(x));
sum = sum + 0.375f * src(b.idx_row_high(src_y ), b.idx_col_high(x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x));
@@ -129,7 +129,7 @@ namespace cv { namespace gpu { namespace device
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x));
sum = sum + 0.375f * src(src_y , b.idx_col(left_x));
sum = sum + 0.375f * src(b.idx_row_high(src_y ), b.idx_col(left_x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x));
@@ -144,7 +144,7 @@ namespace cv { namespace gpu { namespace device
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x));
sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x));
sum = sum + 0.375f * src(b.idx_row_high(src_y ), b.idx_col_high(right_x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x));

View File

@@ -280,7 +280,8 @@ __global__ void scanRows(T_in *d_src, Ncv32u texOffs, Ncv32u srcWidth, Ncv32u sr
__shared__ T_out shmem[NUM_SCAN_THREADS * 2];
__shared__ T_out carryElem;
carryElem = 0;
if (threadIdx.x == 0)
carryElem = 0;
__syncthreads();
while (numBuckets--)