added test stereo_bm and added "volatile" in constantspacebp
This commit is contained in:
@@ -322,12 +322,14 @@ namespace csbp_krnls
|
||||
if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } __syncthreads(); }
|
||||
if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } __syncthreads(); }
|
||||
|
||||
if (winsz >= 64) if (tid < 32) dline[tid] += dline[tid + 32];
|
||||
if (winsz >= 32) if (tid < 16) dline[tid] += dline[tid + 16];
|
||||
if (winsz >= 16) if (tid < 8) dline[tid] += dline[tid + 8];
|
||||
if (winsz >= 8) if (tid < 4) dline[tid] += dline[tid + 4];
|
||||
if (winsz >= 4) if (tid < 2) dline[tid] += dline[tid + 2];
|
||||
if (winsz >= 2) if (tid < 1) dline[tid] += dline[tid + 1];
|
||||
volatile float* vdline = smem + winsz * threadIdx.z;
|
||||
|
||||
if (winsz >= 64) if (tid < 32) vdline[tid] += vdline[tid + 32];
|
||||
if (winsz >= 32) if (tid < 16) vdline[tid] += vdline[tid + 16];
|
||||
if (winsz >= 16) if (tid < 8) vdline[tid] += vdline[tid + 8];
|
||||
if (winsz >= 8) if (tid < 4) vdline[tid] += vdline[tid + 4];
|
||||
if (winsz >= 4) if (tid < 2) vdline[tid] += vdline[tid + 2];
|
||||
if (winsz >= 2) if (tid < 1) vdline[tid] += vdline[tid + 1];
|
||||
|
||||
T* data_cost = (T*)ctemp + y_out * cmsg_step1 + x_out;
|
||||
|
||||
@@ -524,12 +526,14 @@ namespace csbp_krnls
|
||||
if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } __syncthreads(); }
|
||||
if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } __syncthreads(); }
|
||||
|
||||
if (winsz >= 64) if (tid < 32) dline[tid] += dline[tid + 32];
|
||||
if (winsz >= 32) if (tid < 16) dline[tid] += dline[tid + 16];
|
||||
if (winsz >= 16) if (tid < 8) dline[tid] += dline[tid + 8];
|
||||
if (winsz >= 8) if (tid < 4) dline[tid] += dline[tid + 4];
|
||||
if (winsz >= 4) if (tid < 2) dline[tid] += dline[tid + 2];
|
||||
if (winsz >= 2) if (tid < 1) dline[tid] += dline[tid + 1];
|
||||
volatile float* vdline = smem + winsz * threadIdx.z;
|
||||
|
||||
if (winsz >= 64) if (tid < 32) vdline[tid] += vdline[tid + 32];
|
||||
if (winsz >= 32) if (tid < 16) vdline[tid] += vdline[tid + 16];
|
||||
if (winsz >= 16) if (tid < 8) vdline[tid] += vdline[tid + 8];
|
||||
if (winsz >= 8) if (tid < 4) vdline[tid] += vdline[tid + 4];
|
||||
if (winsz >= 4) if (tid < 2) vdline[tid] += vdline[tid + 2];
|
||||
if (winsz >= 2) if (tid < 1) vdline[tid] += vdline[tid + 1];
|
||||
|
||||
if (tid == 0)
|
||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(dline[0]);
|
||||
|
Reference in New Issue
Block a user