optimized histogram merging

This commit is contained in:
Ilya Lavrenov 2014-06-09 19:58:45 +04:00
parent eeaa4b3665
commit c9528b3952

View File

@ -126,21 +126,31 @@ __kernel void merge_histogram(__global const int * ghist, __global uchar * histp
int lid = get_local_id(0);
__global HT * hist = (__global HT *)(histptr + hist_offset);
#if WGS >= BINS
HT res = (HT)(0);
#else
#pragma unroll
for (int i = lid; i < BINS; i += WGS)
hist[i] = ghist[i];
barrier(CLK_LOCAL_MEM_FENCE);
hist[i] = (HT)(0);
#endif
#pragma unroll
for (int i = 1; i < HISTS_COUNT; ++i)
for (int i = 0; i < HISTS_COUNT; ++i)
{
ghist += BINS;
#pragma unroll
for (int j = lid; j < BINS; j += WGS)
#if WGS >= BINS
res += convertToHT(ghist[j]);
#else
hist[j] += convertToHT(ghist[j]);
barrier(CLK_LOCAL_MEM_FENCE);
#endif
ghist += BINS;
}
#if WGS >= BINS
if (lid < BINS)
*(__global HT *)(histptr + mad24(lid, hist_step, hist_offset)) = res;
#endif
}
__kernel void calcLUT(__global uchar * dst, __constant int * hist, int total)