Add moments function
This commit is contained in:
parent
620c699456
commit
6df23428af
@ -1773,6 +1773,8 @@ namespace cv
|
||||
const oclMat &bu, const oclMat &bv,
|
||||
float pos, oclMat &newFrame, oclMat &buf);
|
||||
|
||||
//! computes moments of the rasterized shape or a vector of points
|
||||
CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
|
||||
}
|
||||
}
|
||||
#if defined _MSC_VER && _MSC_VER >= 1200
|
||||
|
938
modules/ocl/src/kernels/moments.cl
Normal file
938
modules/ocl/src/kernels/moments.cl
Normal file
@ -0,0 +1,938 @@
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#else
|
||||
typedef float double;
|
||||
typedef float4 double4;
|
||||
#define convert_double4 convert_float4
|
||||
#endif
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf:enable
|
||||
//#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void icvContourMoments(int contour_total,
|
||||
__global float* reader_oclmat_data,
|
||||
__global double* dst_a00,
|
||||
__global double* dst_a10,
|
||||
__global double* dst_a01,
|
||||
__global double* dst_a20,
|
||||
__global double* dst_a11,
|
||||
__global double* dst_a02,
|
||||
__global double* dst_a30,
|
||||
__global double* dst_a21,
|
||||
__global double* dst_a12,
|
||||
__global double* dst_a03)
|
||||
{
|
||||
double xi_1, yi_1, xi_12, yi_12, xi, yi, xi2, yi2, dxy, xii_1, yii_1;
|
||||
int idx = get_global_id(0);
|
||||
|
||||
xi_1 = *(reader_oclmat_data + (get_global_id(0) << 1));
|
||||
yi_1 = *(reader_oclmat_data + (get_global_id(0) << 1) + 1);
|
||||
xi_12 = xi_1 * xi_1;
|
||||
yi_12 = yi_1 * yi_1;
|
||||
|
||||
if(idx == contour_total - 1)
|
||||
{
|
||||
xi = *(reader_oclmat_data);
|
||||
yi = *(reader_oclmat_data + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
xi = *(reader_oclmat_data + (idx + 1) * 2);
|
||||
yi = *(reader_oclmat_data + (idx + 1) * 2 + 1);
|
||||
}
|
||||
|
||||
xi2 = xi * xi;
|
||||
yi2 = yi * yi;
|
||||
dxy = xi_1 * yi - xi * yi_1;
|
||||
xii_1 = xi_1 + xi;
|
||||
yii_1 = yi_1 + yi;
|
||||
|
||||
dst_a00[idx] = dxy;
|
||||
dst_a10[idx] = dxy * xii_1;
|
||||
dst_a01[idx] = dxy * yii_1;
|
||||
dst_a20[idx] = dxy * (xi_1 * xii_1 + xi2);
|
||||
dst_a11[idx] = dxy * (xi_1 * (yii_1 + yi_1) + xi * (yii_1 + yi));
|
||||
dst_a02[idx] = dxy * (yi_1 * yii_1 + yi2);
|
||||
dst_a30[idx] = dxy * xii_1 * (xi_12 + xi2);
|
||||
dst_a03[idx] = dxy * yii_1 * (yi_12 + yi2);
|
||||
dst_a21[idx] =
|
||||
dxy * (xi_12 * (3 * yi_1 + yi) + 2 * xi * xi_1 * yii_1 +
|
||||
xi2 * (yi_1 + 3 * yi));
|
||||
dst_a12[idx] =
|
||||
dxy * (yi_12 * (3 * xi_1 + xi) + 2 * yi * yi_1 * xii_1 +
|
||||
yi2 * (xi_1 + 3 * xi));
|
||||
}
|
||||
//#endif
|
||||
|
||||
//#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
|
||||
__global double* dst_m00,
|
||||
__global double* dst_m10,
|
||||
__global double* dst_m01,
|
||||
__global double* dst_m20,
|
||||
__global double* dst_m11,
|
||||
__global double* dst_m02,
|
||||
__global double* dst_m30,
|
||||
__global double* dst_m21,
|
||||
__global double* dst_m12,
|
||||
__global double* dst_m03,
|
||||
int dst_cols, int dst_step, int type, int depth, int cn, int coi, int binary, int TILE_SIZE)
|
||||
{
|
||||
uchar tmp_coi[16]; // get the coi data
|
||||
uchar16 tmp[16];
|
||||
int VLEN_C = 16; // vector length of uchar
|
||||
|
||||
int gidy = get_global_id(0);
|
||||
int gidx = get_global_id(1);
|
||||
int wgidy = get_group_id(0);
|
||||
int wgidx = get_group_id(1);
|
||||
int lidy = get_local_id(0);
|
||||
int lidx = get_local_id(1);
|
||||
int y = wgidy*TILE_SIZE; // vector length of uchar
|
||||
int x = wgidx*TILE_SIZE; // vector length of uchar
|
||||
int kcn = (cn==2)?2:4;
|
||||
int rstep = min(src_step, TILE_SIZE);
|
||||
tileSize_height = min(TILE_SIZE, src_rows - y);
|
||||
tileSize_width = min(TILE_SIZE, src_cols - x);
|
||||
|
||||
if( tileSize_width < TILE_SIZE )
|
||||
for(int i = tileSize_width; i < rstep; i++ )
|
||||
*((__global uchar*)src_data+(y+lidy)*src_step+x+i) = 0;
|
||||
if( coi > 0 ) //channel of interest
|
||||
for(int i = 0; i < tileSize_width; i += VLEN_C)
|
||||
{
|
||||
for(int j=0; j<VLEN_C; j++)
|
||||
tmp_coi[j] = *((__global uchar*)src_data+(y+lidy)*src_step+(x+i+j)*kcn+coi-1);
|
||||
tmp[i/VLEN_C] = (uchar16)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7],
|
||||
tmp_coi[8],tmp_coi[9],tmp_coi[10],tmp_coi[11],tmp_coi[12],tmp_coi[13],tmp_coi[14],tmp_coi[15]);
|
||||
}
|
||||
else
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_C)
|
||||
tmp[i/VLEN_C] = *(src_data+(y+lidy)*src_step/VLEN_C+(x+i)/VLEN_C);
|
||||
uchar16 zero = (uchar16)(0);
|
||||
uchar16 full = (uchar16)(255);
|
||||
if( binary )
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_C)
|
||||
tmp[i/VLEN_C] = (tmp[i/VLEN_C]!=zero)?full:zero;
|
||||
double mom[10];
|
||||
__local int m[10][128];
|
||||
if(lidy == 0)
|
||||
for(int i=0; i<10; i++)
|
||||
for(int j=0; j<128; j++)
|
||||
m[i][j]=0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int lm[10] = {0};
|
||||
int16 x0 = (int16)(0);
|
||||
int16 x1 = (int16)(0);
|
||||
int16 x2 = (int16)(0);
|
||||
int16 x3 = (int16)(0);
|
||||
for( int xt = 0 ; xt < tileSize_width; xt+=(VLEN_C) )
|
||||
{
|
||||
int16 v_xt = (int16)(xt, xt+1, xt+2, xt+3, xt+4, xt+5, xt+6, xt+7, xt+8, xt+9, xt+10, xt+11, xt+12, xt+13, xt+14, xt+15);
|
||||
int16 p = convert_int16(tmp[xt/VLEN_C]);
|
||||
int16 xp = v_xt * p, xxp = xp *v_xt;
|
||||
x0 += p;
|
||||
x1 += xp;
|
||||
x2 += xxp;
|
||||
x3 += xxp * v_xt;
|
||||
}
|
||||
x0.s0 += x0.s1 + x0.s2 + x0.s3 + x0.s4 + x0.s5 + x0.s6 + x0.s7 + x0.s8 + x0.s9 + x0.sa + x0.sb + x0.sc + x0.sd + x0.se + x0.sf;
|
||||
x1.s0 += x1.s1 + x1.s2 + x1.s3 + x1.s4 + x1.s5 + x1.s6 + x1.s7 + x1.s8 + x1.s9 + x1.sa + x1.sb + x1.sc + x1.sd + x1.se + x1.sf;
|
||||
x2.s0 += x2.s1 + x2.s2 + x2.s3 + x2.s4 + x2.s5 + x2.s6 + x2.s7 + x2.s8 + x2.s9 + x2.sa + x2.sb + x2.sc + x2.sd + x2.se + x2.sf;
|
||||
x3.s0 += x3.s1 + x3.s2 + x3.s3 + x3.s4 + x3.s5 + x3.s6 + x3.s7 + x3.s8 + x3.s9 + x3.sa + x3.sb + x3.sc + x3.sd + x3.se + x3.sf;
|
||||
int py = lidy * ((int)x0.s0);
|
||||
int sy = lidy*lidy;
|
||||
int bheight = min(tileSize_height, TILE_SIZE/2);
|
||||
if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
|
||||
{
|
||||
m[9][lidy-bheight] = ((int)py) * sy; // m03
|
||||
m[8][lidy-bheight] = ((int)x1.s0) * sy; // m12
|
||||
m[7][lidy-bheight] = ((int)x2.s0) * lidy; // m21
|
||||
m[6][lidy-bheight] = x3.s0; // m30
|
||||
m[5][lidy-bheight] = x0.s0 * sy; // m02
|
||||
m[4][lidy-bheight] = x1.s0 * lidy; // m11
|
||||
m[3][lidy-bheight] = x2.s0; // m20
|
||||
m[2][lidy-bheight] = py; // m01
|
||||
m[1][lidy-bheight] = x1.s0; // m10
|
||||
m[0][lidy-bheight] = x0.s0; // m00
|
||||
}
|
||||
else if(lidy < bheight)
|
||||
{
|
||||
lm[9] = ((int)py) * sy; // m03
|
||||
lm[8] = ((int)x1.s0) * sy; // m12
|
||||
lm[7] = ((int)x2.s0) * lidy; // m21
|
||||
lm[6] = x3.s0; // m30
|
||||
lm[5] = x0.s0 * sy; // m02
|
||||
lm[4] = x1.s0 * lidy; // m11
|
||||
lm[3] = x2.s0; // m20
|
||||
lm[2] = py; // m01
|
||||
lm[1] = x1.s0; // m10
|
||||
lm[0] = x0.s0; // m00
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for( int j = bheight; j >= 1; j = j/2 )
|
||||
{
|
||||
if(lidy < j)
|
||||
for( int i = 0; i < 10; i++ )
|
||||
lm[i] = lm[i] + m[i][lidy];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lidy >= j/2&&lidy < j)
|
||||
for( int i = 0; i < 10; i++ )
|
||||
m[i][lidy-j/2] = lm[i];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if(lidy == 0&&lidx == 0)
|
||||
{
|
||||
for( int mt = 0; mt < 10; mt++ )
|
||||
mom[mt] = (double)lm[mt];
|
||||
if(binary)
|
||||
{
|
||||
double s = 1./255;
|
||||
for( int mt = 0; mt < 10; mt++ )
|
||||
mom[mt] *= s;
|
||||
}
|
||||
double xm = x * mom[0], ym = y * mom[0];
|
||||
|
||||
// accumulate moments computed in each tile
|
||||
|
||||
// + m00 ( = m00' )
|
||||
dst_m00[wgidy*dst_cols+wgidx] = mom[0];
|
||||
|
||||
// + m10 ( = m10' + x*m00' )
|
||||
dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm;
|
||||
|
||||
// + m01 ( = m01' + y*m00' )
|
||||
dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym;
|
||||
|
||||
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
|
||||
dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm);
|
||||
|
||||
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
|
||||
dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
|
||||
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
|
||||
dst_m02[wgidy*dst_cols+wgidx] = mom[5] + y * (mom[2] * 2 + ym);
|
||||
|
||||
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
|
||||
dst_m30[wgidy*dst_cols+wgidx] = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
|
||||
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
|
||||
dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
|
||||
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
|
||||
dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
|
||||
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
|
||||
dst_m03[wgidy*dst_cols+wgidx] = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}
|
||||
}
|
||||
//#endif
|
||||
//#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void dst_sum(int src_rows, int src_cols, int tile_height, int tile_width, int TILE_SIZE, __global double* sum, __global double* dst_m00,
|
||||
__global double* dst_m10,
|
||||
__global double* dst_m01,
|
||||
__global double* dst_m20,
|
||||
__global double* dst_m11,
|
||||
__global double* dst_m02,
|
||||
__global double* dst_m30,
|
||||
__global double* dst_m21,
|
||||
__global double* dst_m12,
|
||||
__global double* dst_m03)
|
||||
{
|
||||
int gidy = get_global_id(0);
|
||||
int gidx = get_global_id(1);
|
||||
int block_y = src_rows/tile_height;
|
||||
int block_x = src_cols/tile_width;
|
||||
int block_num;
|
||||
|
||||
if(src_rows > TILE_SIZE && src_rows % TILE_SIZE != 0)
|
||||
block_y ++;
|
||||
if(src_cols > TILE_SIZE && src_cols % TILE_SIZE != 0)
|
||||
block_x ++;
|
||||
block_num = block_y * block_x;
|
||||
__local double dst_sum[10][128];
|
||||
if(gidy<128-block_num)
|
||||
for(int i=0; i<10; i++)
|
||||
dst_sum[i][gidy+block_num]=0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(gidy<block_num)
|
||||
{
|
||||
dst_sum[0][gidy] = dst_m00[gidy];
|
||||
dst_sum[1][gidy] = dst_m10[gidy];
|
||||
dst_sum[2][gidy] = dst_m01[gidy];
|
||||
dst_sum[3][gidy] = dst_m20[gidy];
|
||||
dst_sum[4][gidy] = dst_m11[gidy];
|
||||
dst_sum[5][gidy] = dst_m02[gidy];
|
||||
dst_sum[6][gidy] = dst_m30[gidy];
|
||||
dst_sum[7][gidy] = dst_m21[gidy];
|
||||
dst_sum[8][gidy] = dst_m12[gidy];
|
||||
dst_sum[9][gidy] = dst_m03[gidy];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for(int lsize=64; lsize>0; lsize>>=1)
|
||||
{
|
||||
if(gidy<lsize)
|
||||
{
|
||||
int lsize2 = gidy + lsize;
|
||||
for(int i=0; i<10; i++)
|
||||
dst_sum[i][gidy] += dst_sum[i][lsize2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if(gidy==0)
|
||||
for(int i=0; i<10; i++)
|
||||
sum[i] = dst_sum[i][0];
|
||||
}
|
||||
//#endif
|
||||
//#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
|
||||
__global double* dst_m00,
|
||||
__global double* dst_m10,
|
||||
__global double* dst_m01,
|
||||
__global double* dst_m20,
|
||||
__global double* dst_m11,
|
||||
__global double* dst_m02,
|
||||
__global double* dst_m30,
|
||||
__global double* dst_m21,
|
||||
__global double* dst_m12,
|
||||
__global double* dst_m03,
|
||||
int dst_cols, int dst_step,
|
||||
int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
|
||||
{
|
||||
ushort tmp_coi[8]; // get the coi data
|
||||
ushort8 tmp[32];
|
||||
int VLEN_US = 8; // vector length of ushort
|
||||
int gidy = get_global_id(0);
|
||||
int gidx = get_global_id(1);
|
||||
int wgidy = get_group_id(0);
|
||||
int wgidx = get_group_id(1);
|
||||
int lidy = get_local_id(0);
|
||||
int lidx = get_local_id(1);
|
||||
int y = wgidy*TILE_SIZE; // real Y index of pixel
|
||||
int x = wgidx*TILE_SIZE; // real X index of pixel
|
||||
int kcn = (cn==2)?2:4;
|
||||
int rstep = min(src_step/2, TILE_SIZE);
|
||||
tileSize_height = min(TILE_SIZE, src_rows - y);
|
||||
tileSize_width = min(TILE_SIZE, src_cols -x);
|
||||
if(src_cols > TILE_SIZE && tileSize_width < TILE_SIZE)
|
||||
for(int i=tileSize_width; i < rstep; i++ )
|
||||
*((__global ushort*)src_data+(y+lidy)*src_step/2+x+i) = 0;
|
||||
if( coi > 0 )
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_US)
|
||||
{
|
||||
for(int j=0; j<VLEN_US; j++)
|
||||
tmp_coi[j] = *((__global ushort*)src_data+(y+lidy)*(int)src_step/2+(x+i+j)*kcn+coi-1);
|
||||
tmp[i/VLEN_US] = (ushort8)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]);
|
||||
}
|
||||
else
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_US)
|
||||
tmp[i/VLEN_US] = *(src_data+(y+lidy)*src_step/(2*VLEN_US)+(x+i)/VLEN_US);
|
||||
ushort8 zero = (ushort8)(0);
|
||||
ushort8 full = (ushort8)(255);
|
||||
if( binary )
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_US)
|
||||
tmp[i/VLEN_US] = (tmp[i/VLEN_US]!=zero)?full:zero;
|
||||
double mom[10];
|
||||
__local long m[10][128];
|
||||
if(lidy == 0)
|
||||
for(int i=0; i<10; i++)
|
||||
for(int j=0; j<128; j++)
|
||||
m[i][j]=0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
long lm[10] = {0};
|
||||
int8 x0 = (int8)(0);
|
||||
int8 x1 = (int8)(0);
|
||||
int8 x2 = (int8)(0);
|
||||
long8 x3 = (long8)(0);
|
||||
for( int xt = 0 ; xt < tileSize_width; xt+=(VLEN_US) )
|
||||
{
|
||||
int8 v_xt = (int8)(xt, xt+1, xt+2, xt+3, xt+4, xt+5, xt+6, xt+7);
|
||||
int8 p = convert_int8(tmp[xt/VLEN_US]);
|
||||
int8 xp = v_xt * p, xxp = xp * v_xt;
|
||||
x0 += p;
|
||||
x1 += xp;
|
||||
x2 += xxp;
|
||||
x3 += convert_long8(xxp) *convert_long8(v_xt);
|
||||
}
|
||||
x0.s0 += x0.s1 + x0.s2 + x0.s3 + x0.s4 + x0.s5 + x0.s6 + x0.s7;
|
||||
x1.s0 += x1.s1 + x1.s2 + x1.s3 + x1.s4 + x1.s5 + x1.s6 + x1.s7;
|
||||
x2.s0 += x2.s1 + x2.s2 + x2.s3 + x2.s4 + x2.s5 + x2.s6 + x2.s7;
|
||||
x3.s0 += x3.s1 + x3.s2 + x3.s3 + x3.s4 + x3.s5 + x3.s6 + x3.s7;
|
||||
|
||||
int py = lidy * x0.s0, sy = lidy*lidy;
|
||||
int bheight = min(tileSize_height, TILE_SIZE/2);
|
||||
if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
|
||||
{
|
||||
m[9][lidy-bheight] = ((long)py) * sy; // m03
|
||||
m[8][lidy-bheight] = ((long)x1.s0) * sy; // m12
|
||||
m[7][lidy-bheight] = ((long)x2.s0) * lidy; // m21
|
||||
m[6][lidy-bheight] = x3.s0; // m30
|
||||
m[5][lidy-bheight] = x0.s0 * sy; // m02
|
||||
m[4][lidy-bheight] = x1.s0 * lidy; // m11
|
||||
m[3][lidy-bheight] = x2.s0; // m20
|
||||
m[2][lidy-bheight] = py; // m01
|
||||
m[1][lidy-bheight] = x1.s0; // m10
|
||||
m[0][lidy-bheight] = x0.s0; // m00
|
||||
}
|
||||
else if(lidy < bheight)
|
||||
{
|
||||
lm[9] = ((long)py) * sy; // m03
|
||||
lm[8] = ((long)x1.s0) * sy; // m12
|
||||
lm[7] = ((long)x2.s0) * lidy; // m21
|
||||
lm[6] = x3.s0; // m30
|
||||
lm[5] = x0.s0 * sy; // m02
|
||||
lm[4] = x1.s0 * lidy; // m11
|
||||
lm[3] = x2.s0; // m20
|
||||
lm[2] = py; // m01
|
||||
lm[1] = x1.s0; // m10
|
||||
lm[0] = x0.s0; // m00
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for( int j = TILE_SIZE/2; j >= 1; j = j/2 )
|
||||
{
|
||||
if(lidy < j)
|
||||
for( int i = 0; i < 10; i++ )
|
||||
lm[i] = lm[i] + m[i][lidy];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lidy >= j/2&&lidy < j)
|
||||
for( int i = 0; i < 10; i++ )
|
||||
m[i][lidy-j/2] = lm[i];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if(lidy == 0&&lidx == 0)
|
||||
{
|
||||
for(int mt = 0; mt < 10; mt++ )
|
||||
mom[mt] = (double)lm[mt];
|
||||
|
||||
if(binary)
|
||||
{
|
||||
double s = 1./255;
|
||||
for( int mt = 0; mt < 10; mt++ )
|
||||
mom[mt] *= s;
|
||||
}
|
||||
|
||||
double xm = x *mom[0], ym = y * mom[0];
|
||||
|
||||
// accumulate moments computed in each tile
|
||||
|
||||
// + m00 ( = m00' )
|
||||
dst_m00[wgidy*dst_cols+wgidx] = mom[0];
|
||||
|
||||
// + m10 ( = m10' + x*m00' )
|
||||
dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm;
|
||||
|
||||
// + m01 ( = m01' + y*m00' )
|
||||
dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym;
|
||||
|
||||
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
|
||||
dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm);
|
||||
|
||||
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
|
||||
dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
|
||||
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
|
||||
dst_m02[wgidy*dst_cols+wgidx] = mom[5] + y * (mom[2] * 2 + ym);
|
||||
|
||||
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
|
||||
dst_m30[wgidy*dst_cols+wgidx] = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
|
||||
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
|
||||
dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
|
||||
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
|
||||
dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
|
||||
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
|
||||
dst_m03[wgidy*dst_cols+wgidx] = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}
|
||||
}
|
||||
//#endif
|
||||
//#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
|
||||
__global double* dst_m00,
|
||||
__global double* dst_m10,
|
||||
__global double* dst_m01,
|
||||
__global double* dst_m20,
|
||||
__global double* dst_m11,
|
||||
__global double* dst_m02,
|
||||
__global double* dst_m30,
|
||||
__global double* dst_m21,
|
||||
__global double* dst_m12,
|
||||
__global double* dst_m03,
|
||||
int dst_cols, int dst_step,
|
||||
int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
|
||||
{
|
||||
short tmp_coi[8]; // get the coi data
|
||||
short8 tmp[32];
|
||||
int VLEN_S =8; // vector length of short
|
||||
int gidy = get_global_id(0);
|
||||
int gidx = get_global_id(1);
|
||||
int wgidy = get_group_id(0);
|
||||
int wgidx = get_group_id(1);
|
||||
int lidy = get_local_id(0);
|
||||
int lidx = get_local_id(1);
|
||||
int y = wgidy*TILE_SIZE; // real Y index of pixel
|
||||
int x = wgidx*TILE_SIZE; // real X index of pixel
|
||||
int kcn = (cn==2)?2:4;
|
||||
int rstep = min(src_step/2, TILE_SIZE);
|
||||
tileSize_height = min(TILE_SIZE, src_rows - y);
|
||||
tileSize_width = min(TILE_SIZE, src_cols -x);
|
||||
if(tileSize_width < TILE_SIZE)
|
||||
for(int i = tileSize_width; i < rstep; i++ )
|
||||
*((__global short*)src_data+(y+lidy)*src_step/2+x+i) = 0;
|
||||
if( coi > 0 )
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_S)
|
||||
{
|
||||
for(int j=0; j<VLEN_S; j++)
|
||||
tmp_coi[j] = *((__global short*)src_data+(y+lidy)*src_step/2+(x+i+j)*kcn+coi-1);
|
||||
tmp[i/VLEN_S] = (short8)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]);
|
||||
}
|
||||
else
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_S)
|
||||
tmp[i/VLEN_S] = *(src_data+(y+lidy)*src_step/(2*VLEN_S)+(x+i)/VLEN_S);
|
||||
short8 zero = (short8)(0);
|
||||
short8 full = (short8)(255);
|
||||
if( binary )
|
||||
for(int i=0; i < tileSize_width; i+=(VLEN_S))
|
||||
tmp[i/VLEN_S] = (tmp[i/VLEN_S]!=zero)?full:zero;
|
||||
|
||||
double mom[10];
|
||||
__local long m[10][128];
|
||||
if(lidy == 0)
|
||||
for(int i=0; i<10; i++)
|
||||
for(int j=0; j<128; j++)
|
||||
m[i][j]=0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
long lm[10] = {0};
|
||||
int8 x0 = (int8)(0);
|
||||
int8 x1 = (int8)(0);
|
||||
int8 x2 = (int8)(0);
|
||||
long8 x3 = (long8)(0);
|
||||
for( int xt = 0 ; xt < tileSize_width; xt+= (VLEN_S))
|
||||
{
|
||||
int8 v_xt = (int8)(xt, xt+1, xt+2, xt+3, xt+4, xt+5, xt+6, xt+7);
|
||||
int8 p = convert_int8(tmp[xt/VLEN_S]);
|
||||
int8 xp = v_xt * p, xxp = xp * v_xt;
|
||||
x0 += p;
|
||||
x1 += xp;
|
||||
x2 += xxp;
|
||||
x3 += convert_long8(xxp) * convert_long8(v_xt);
|
||||
}
|
||||
x0.s0 += x0.s1 + x0.s2 + x0.s3 + x0.s4 + x0.s5 + x0.s6 + x0.s7;
|
||||
x1.s0 += x1.s1 + x1.s2 + x1.s3 + x1.s4 + x1.s5 + x1.s6 + x1.s7;
|
||||
x2.s0 += x2.s1 + x2.s2 + x2.s3 + x2.s4 + x2.s5 + x2.s6 + x2.s7;
|
||||
x3.s0 += x3.s1 + x3.s2 + x3.s3 + x3.s4 + x3.s5 + x3.s6 + x3.s7;
|
||||
|
||||
int py = lidy * x0.s0, sy = lidy*lidy;
|
||||
int bheight = min(tileSize_height, TILE_SIZE/2);
|
||||
if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
|
||||
{
|
||||
m[9][lidy-bheight] = ((long)py) * sy; // m03
|
||||
m[8][lidy-bheight] = ((long)x1.s0) * sy; // m12
|
||||
m[7][lidy-bheight] = ((long)x2.s0) * lidy; // m21
|
||||
m[6][lidy-bheight] = x3.s0; // m30
|
||||
m[5][lidy-bheight] = x0.s0 * sy; // m02
|
||||
m[4][lidy-bheight] = x1.s0 * lidy; // m11
|
||||
m[3][lidy-bheight] = x2.s0; // m20
|
||||
m[2][lidy-bheight] = py; // m01
|
||||
m[1][lidy-bheight] = x1.s0; // m10
|
||||
m[0][lidy-bheight] = x0.s0; // m00
|
||||
}
|
||||
else if(lidy < bheight)
|
||||
{
|
||||
lm[9] = ((long)py) * sy; // m03
|
||||
lm[8] = ((long)(x1.s0)) * sy; // m12
|
||||
lm[7] = ((long)(x2.s0)) * lidy; // m21
|
||||
lm[6] = x3.s0; // m30
|
||||
lm[5] = x0.s0 * sy; // m02
|
||||
lm[4] = x1.s0 * lidy; // m11
|
||||
lm[3] = x2.s0; // m20
|
||||
lm[2] = py; // m01
|
||||
lm[1] = x1.s0; // m10
|
||||
lm[0] = x0.s0; // m00
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for( int j = TILE_SIZE/2; j >=1; j = j/2 )
|
||||
{
|
||||
if(lidy < j)
|
||||
for( int i = 0; i < 10; i++ )
|
||||
lm[i] = lm[i] + m[i][lidy];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lidy >= j/2&&lidy < j)
|
||||
for( int i = 0; i < 10; i++ )
|
||||
m[i][lidy-j/2] = lm[i];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if(lidy ==0 &&lidx ==0)
|
||||
{
|
||||
for(int mt = 0; mt < 10; mt++ )
|
||||
mom[mt] = (double)lm[mt];
|
||||
|
||||
if(binary)
|
||||
{
|
||||
double s = 1./255;
|
||||
for( int mt = 0; mt < 10; mt++ )
|
||||
mom[mt] *= s;
|
||||
}
|
||||
|
||||
double xm = x * mom[0], ym = y*mom[0];
|
||||
|
||||
// accumulate moments computed in each tile
|
||||
|
||||
// + m00 ( = m00' )
|
||||
dst_m00[wgidy*dst_cols+wgidx] = mom[0];
|
||||
|
||||
// + m10 ( = m10' + x*m00' )
|
||||
dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm;
|
||||
|
||||
// + m01 ( = m01' + y*m00' )
|
||||
dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym;
|
||||
|
||||
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
|
||||
dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm);
|
||||
|
||||
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
|
||||
dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
|
||||
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
|
||||
dst_m02[wgidy*dst_cols+wgidx] = mom[5] + y * (mom[2] * 2 + ym);
|
||||
|
||||
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
|
||||
dst_m30[wgidy*dst_cols+wgidx] = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
|
||||
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
|
||||
dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
|
||||
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
|
||||
dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
|
||||
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
|
||||
dst_m03[wgidy*dst_cols+wgidx] = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}
|
||||
}
|
||||
//#endif
|
||||
//#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
|
||||
__global double* dst_m00,
|
||||
__global double* dst_m10,
|
||||
__global double* dst_m01,
|
||||
__global double* dst_m20,
|
||||
__global double* dst_m11,
|
||||
__global double* dst_m02,
|
||||
__global double* dst_m30,
|
||||
__global double* dst_m21,
|
||||
__global double* dst_m12,
|
||||
__global double* dst_m03,
|
||||
int dst_cols, int dst_step,
|
||||
int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
|
||||
{
|
||||
float tmp_coi[4]; // get the coi data
|
||||
float4 tmp[64] ;
|
||||
int VLEN_F = 4; // vector length of float
|
||||
int gidy = get_global_id(0);
|
||||
int gidx = get_global_id(1);
|
||||
int wgidy = get_group_id(0);
|
||||
int wgidx = get_group_id(1);
|
||||
int lidy = get_local_id(0);
|
||||
int lidx = get_local_id(1);
|
||||
int y = wgidy*TILE_SIZE; // real Y index of pixel
|
||||
int x = wgidx*TILE_SIZE; // real X index of pixel
|
||||
int kcn = (cn==2)?2:4;
|
||||
int rstep = min(src_step/4, TILE_SIZE);
|
||||
tileSize_height = min(TILE_SIZE, src_rows - y);
|
||||
tileSize_width = min(TILE_SIZE, src_cols -x);
|
||||
if(tileSize_width < TILE_SIZE)
|
||||
for(int i = tileSize_width; i < rstep; i++ )
|
||||
*((__global float*)src_data+(y+lidy)*src_step/4+x+i) = 0;
|
||||
if( coi > 0 )
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_F)
|
||||
{
|
||||
for(int j=0; j<4; j++)
|
||||
tmp_coi[j] = *(src_data+(y+lidy)*src_step/4+(x+i+j)*kcn+coi-1);
|
||||
tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
|
||||
}
|
||||
else
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_F)
|
||||
tmp[i/VLEN_F] = (float4)(*(src_data+(y+lidy)*src_step/4+x+i),*(src_data+(y+lidy)*src_step/4+x+i+1),*(src_data+(y+lidy)*src_step/4+x+i+2),*(src_data+(y+lidy)*src_step/4+x+i+3));
|
||||
float4 zero = (float4)(0);
|
||||
float4 full = (float4)(255);
|
||||
if( binary )
|
||||
for(int i=0; i < tileSize_width; i+=4)
|
||||
tmp[i/VLEN_F] = (tmp[i/VLEN_F]!=zero)?full:zero;
|
||||
double mom[10];
|
||||
__local double m[10][128];
|
||||
if(lidy == 0)
|
||||
for(int i = 0; i < 10; i ++)
|
||||
for(int j = 0; j < 128; j ++)
|
||||
m[i][j] = 0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
double lm[10] = {0};
|
||||
double4 x0 = (double4)(0);
|
||||
double4 x1 = (double4)(0);
|
||||
double4 x2 = (double4)(0);
|
||||
double4 x3 = (double4)(0);
|
||||
for( int xt = 0 ; xt < tileSize_width; xt+=VLEN_F )
|
||||
{
|
||||
double4 v_xt = (double4)(xt, xt+1, xt+2, xt+3);
|
||||
double4 p = convert_double4(tmp[xt/VLEN_F]);
|
||||
double4 xp = v_xt * p, xxp = xp * v_xt;
|
||||
x0 += p;
|
||||
x1 += xp;
|
||||
x2 += xxp;
|
||||
x3 += xxp * v_xt;
|
||||
}
|
||||
x0.s0 += x0.s1 + x0.s2 + x0.s3;
|
||||
x1.s0 += x1.s1 + x1.s2 + x1.s3;
|
||||
x2.s0 += x2.s1 + x2.s2 + x2.s3;
|
||||
x3.s0 += x3.s1 + x3.s2 + x3.s3;
|
||||
/*
|
||||
double py = lidy * x0.s0, sy = lidy*lidy;
|
||||
int bheight = min(tileSize_height, TILE_SIZE/2);
|
||||
if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
|
||||
{
|
||||
m[9][lidy-bheight] = ((double)py) * sy; // m03
|
||||
m[8][lidy-bheight] = ((double)x1.s0) * sy; // m12
|
||||
m[7][lidy-bheight] = ((double)x2.s0) * lidy; // m21
|
||||
m[6][lidy-bheight] = x3.s0; // m30
|
||||
m[5][lidy-bheight] = x0.s0 * sy; // m02
|
||||
m[4][lidy-bheight] = x1.s0 * lidy; // m11
|
||||
m[3][lidy-bheight] = x2.s0; // m20
|
||||
m[2][lidy-bheight] = py; // m01
|
||||
m[1][lidy-bheight] = x1.s0; // m10
|
||||
m[0][lidy-bheight] = x0.s0; // m00
|
||||
}
|
||||
else if(lidy < bheight)
|
||||
{
|
||||
lm[9] = ((double)py) * sy; // m03
|
||||
lm[8] = ((double)x1.s0) * sy; // m12
|
||||
lm[7] = ((double)x2.s0) * lidy; // m21
|
||||
lm[6] = x3.s0; // m30
|
||||
lm[5] = x0.s0 * sy; // m02
|
||||
lm[4] = x1.s0 * lidy; // m11
|
||||
lm[3] = x2.s0; // m20
|
||||
lm[2] = py; // m01
|
||||
lm[1] = x1.s0; // m10
|
||||
lm[0] = x0.s0; // m00
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for( int j = TILE_SIZE/2; j >= 1; j = j/2 )
|
||||
{
|
||||
if(lidy < j)
|
||||
for( int i = 0; i < 10; i++ )
|
||||
lm[i] = lm[i] + m[i][lidy];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lidy >= j/2&&lidy < j)
|
||||
for( int i = 0; i < 10; i++ )
|
||||
m[i][lidy-j/2] = lm[i];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if(lidy == 0&&lidx == 0)
|
||||
{
|
||||
for(int mt = 0; mt < 10; mt++ )
|
||||
mom[mt] = (double)lm[mt];
|
||||
|
||||
if(binary)
|
||||
{
|
||||
double s = 1./255;
|
||||
for( int mt = 0; mt < 10; mt++ )
|
||||
mom[mt] *= s;
|
||||
}
|
||||
|
||||
double xm = x * mom[0], ym = y * mom[0];
|
||||
|
||||
// accumulate moments computed in each tile
|
||||
|
||||
// + m00 ( = m00' )
|
||||
dst_m00[wgidy*dst_cols+wgidx]= mom[0];
|
||||
|
||||
// + m10 ( = m10' + x*m00' )
|
||||
dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm;
|
||||
|
||||
// + m01 ( = m01' + y*m00' )
|
||||
dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym;
|
||||
|
||||
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
|
||||
dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm);
|
||||
|
||||
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
|
||||
dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
|
||||
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
|
||||
dst_m02[wgidy*dst_cols+wgidx]= mom[5] + y * (mom[2] * 2 + ym);
|
||||
|
||||
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
|
||||
dst_m30[wgidy*dst_cols+wgidx]= mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
|
||||
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
|
||||
dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
|
||||
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
|
||||
dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
|
||||
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
|
||||
dst_m03[wgidy*dst_cols+wgidx]= mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}*/
|
||||
}
|
||||
//#endif
|
||||
//#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void CvMoments_D6(__global double* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
|
||||
__global double* dst_m00,
|
||||
__global double* dst_m10,
|
||||
__global double* dst_m01,
|
||||
__global double* dst_m20,
|
||||
__global double* dst_m11,
|
||||
__global double* dst_m02,
|
||||
__global double* dst_m30,
|
||||
__global double* dst_m21,
|
||||
__global double* dst_m12,
|
||||
__global double* dst_m03,
|
||||
int dst_cols, int dst_step,
|
||||
int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
|
||||
{
|
||||
double tmp_coi[4]; // get the coi data
|
||||
double4 tmp[64];
|
||||
int VLEN_D = 4; // length of vetor
|
||||
int gidy = get_global_id(0);
|
||||
int gidx = get_global_id(1);
|
||||
int wgidy = get_group_id(0);
|
||||
int wgidx = get_group_id(1);
|
||||
int lidy = get_local_id(0);
|
||||
int lidx = get_local_id(1);
|
||||
int y = wgidy*TILE_SIZE; // real Y index of pixel
|
||||
int x = wgidx*TILE_SIZE; // real X index of pixel
|
||||
int kcn = (cn==2)?2:4;
|
||||
int rstep = min(src_step/8, TILE_SIZE);
|
||||
tileSize_height = min(TILE_SIZE, src_rows - y);
|
||||
tileSize_width = min(TILE_SIZE, src_cols - x);
|
||||
|
||||
if(tileSize_width < TILE_SIZE)
|
||||
for(int i = tileSize_width; i < rstep; i++ )
|
||||
*((__global double*)src_data+(y+lidy)*src_step/8+x+i) = 0;
|
||||
if( coi > 0 )
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_D)
|
||||
{
|
||||
for(int j=0; j<4; j++)
|
||||
tmp_coi[j] = *(src_data+(y+lidy)*src_step/8+(x+i+j)*kcn+coi-1);
|
||||
tmp[i/VLEN_D] = (double4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
|
||||
}
|
||||
else
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_D)
|
||||
tmp[i/VLEN_D] = (double4)(*(src_data+(y+lidy)*src_step/8+x+i),*(src_data+(y+lidy)*src_step/8+x+i+1),*(src_data+(y+lidy)*src_step/8+x+i+2),*(src_data+(y+lidy)*src_step/8+x+i+3));
|
||||
double4 zero = (double4)(0);
|
||||
double4 full = (double4)(255);
|
||||
if( binary )
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_D)
|
||||
tmp[i/VLEN_D] = (tmp[i/VLEN_D]!=zero)?full:zero;
|
||||
double mom[10];
|
||||
__local double m[10][128];
|
||||
if(lidy == 0)
|
||||
for(int i=0; i<10; i++)
|
||||
for(int j=0; j<128; j++)
|
||||
m[i][j]=0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
double lm[10] = {0};
|
||||
double4 x0 = (double4)(0);
|
||||
double4 x1 = (double4)(0);
|
||||
double4 x2 = (double4)(0);
|
||||
double4 x3 = (double4)(0);
|
||||
for( int xt = 0 ; xt < tileSize_width; xt+=VLEN_D )
|
||||
{
|
||||
double4 v_xt = (double4)(xt, xt+1, xt+2, xt+3);
|
||||
double4 p = tmp[xt/VLEN_D];
|
||||
double4 xp = v_xt * p, xxp = xp * v_xt;
|
||||
x0 += p;
|
||||
x1 += xp;
|
||||
x2 += xxp;
|
||||
x3 += xxp *v_xt;
|
||||
}
|
||||
x0.s0 += x0.s1 + x0.s2 + x0.s3;
|
||||
x1.s0 += x1.s1 + x1.s2 + x1.s3;
|
||||
x2.s0 += x2.s1 + x2.s2 + x2.s3;
|
||||
x3.s0 += x3.s1 + x3.s2 + x3.s3;
|
||||
|
||||
double py = lidy * x0.s0, sy = lidy*lidy;
|
||||
int bheight = min(tileSize_height, TILE_SIZE/2);
|
||||
if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
|
||||
{
|
||||
m[9][lidy-bheight] = ((double)py) * sy; // m03
|
||||
m[8][lidy-bheight] = ((double)x1.s0) * sy; // m12
|
||||
m[7][lidy-bheight] = ((double)x2.s0) * lidy; // m21
|
||||
m[6][lidy-bheight] = x3.s0; // m30
|
||||
m[5][lidy-bheight] = x0.s0 * sy; // m02
|
||||
m[4][lidy-bheight] = x1.s0 * lidy; // m11
|
||||
m[3][lidy-bheight] = x2.s0; // m20
|
||||
m[2][lidy-bheight] = py; // m01
|
||||
m[1][lidy-bheight] = x1.s0; // m10
|
||||
m[0][lidy-bheight] = x0.s0; // m00
|
||||
}
|
||||
|
||||
else if(lidy < bheight)
|
||||
{
|
||||
lm[9] = ((double)py) * sy; // m03
|
||||
lm[8] = ((double)x1.s0) * sy; // m12
|
||||
lm[7] = ((double)x2.s0) * lidy; // m21
|
||||
lm[6] = x3.s0; // m30
|
||||
lm[5] = x0.s0 * sy; // m02
|
||||
lm[4] = x1.s0 * lidy; // m11
|
||||
lm[3] = x2.s0; // m20
|
||||
lm[2] = py; // m01
|
||||
lm[1] = x1.s0; // m10
|
||||
lm[0] = x0.s0; // m00
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for( int j = TILE_SIZE/2; j >= 1; j = j/2 )
|
||||
{
|
||||
if(lidy < j)
|
||||
for( int i = 0; i < 10; i++ )
|
||||
lm[i] = lm[i] + m[i][lidy];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lidy >= j/2&&lidy < j)
|
||||
for( int i = 0; i < 10; i++ )
|
||||
m[i][lidy-j/2] = lm[i];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if(lidy == 0&&lidx == 0)
|
||||
{
|
||||
for( int mt = 0; mt < 10; mt++ )
|
||||
mom[mt] = (double)lm[mt];
|
||||
if(binary)
|
||||
{
|
||||
double s = 1./255;
|
||||
for( int mt = 0; mt < 10; mt++ )
|
||||
mom[mt] *= s;
|
||||
}
|
||||
|
||||
double xm = x * mom[0], ym = y * mom[0];
|
||||
|
||||
// accumulate moments computed in each tile
|
||||
|
||||
// + m00 ( = m00' )
|
||||
dst_m00[wgidy*dst_cols+wgidx] = mom[0];
|
||||
|
||||
// + m10 ( = m10' + x*m00' )
|
||||
dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm;
|
||||
|
||||
// + m01 ( = m01' + y*m00' )
|
||||
dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym;
|
||||
|
||||
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
|
||||
dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm);
|
||||
|
||||
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
|
||||
dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
|
||||
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
|
||||
dst_m02[wgidy*dst_cols+wgidx] = mom[5] + y * (mom[2] * 2 + ym);
|
||||
|
||||
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
|
||||
dst_m30[wgidy*dst_cols+wgidx] = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
|
||||
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
|
||||
dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
|
||||
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
|
||||
dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
|
||||
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
|
||||
dst_m03[wgidy*dst_cols+wgidx] = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}
|
||||
}
|
||||
//#endif
|
380
modules/ocl/src/moments.cpp
Normal file
380
modules/ocl/src/moments.cpp
Normal file
@ -0,0 +1,380 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Sen Liu, sen@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include "precomp.hpp"
|
||||
#include <iostream>
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
extern const char *moments;
|
||||
|
||||
// The function calculates center of gravity and the central second order moments
|
||||
static void icvCompleteMomentState( CvMoments* moments )
|
||||
{
|
||||
double cx = 0, cy = 0;
|
||||
double mu20, mu11, mu02;
|
||||
|
||||
assert( moments != 0 );
|
||||
moments->inv_sqrt_m00 = 0;
|
||||
|
||||
if( fabs(moments->m00) > DBL_EPSILON )
|
||||
{
|
||||
double inv_m00 = 1. / moments->m00;
|
||||
cx = moments->m10 * inv_m00;
|
||||
cy = moments->m01 * inv_m00;
|
||||
moments->inv_sqrt_m00 = std::sqrt( fabs(inv_m00) );
|
||||
}
|
||||
|
||||
// mu20 = m20 - m10*cx
|
||||
mu20 = moments->m20 - moments->m10 * cx;
|
||||
// mu11 = m11 - m10*cy
|
||||
mu11 = moments->m11 - moments->m10 * cy;
|
||||
// mu02 = m02 - m01*cy
|
||||
mu02 = moments->m02 - moments->m01 * cy;
|
||||
|
||||
moments->mu20 = mu20;
|
||||
moments->mu11 = mu11;
|
||||
moments->mu02 = mu02;
|
||||
|
||||
// mu30 = m30 - cx*(3*mu20 + cx*m10)
|
||||
moments->mu30 = moments->m30 - cx * (3 * mu20 + cx * moments->m10);
|
||||
mu11 += mu11;
|
||||
// mu21 = m21 - cx*(2*mu11 + cx*m01) - cy*mu20
|
||||
moments->mu21 = moments->m21 - cx * (mu11 + cx * moments->m01) - cy * mu20;
|
||||
// mu12 = m12 - cy*(2*mu11 + cy*m10) - cx*mu02
|
||||
moments->mu12 = moments->m12 - cy * (mu11 + cy * moments->m10) - cx * mu02;
|
||||
// mu03 = m03 - cy*(3*mu02 + cy*m01)
|
||||
moments->mu03 = moments->m03 - cy * (3 * mu02 + cy * moments->m01);
|
||||
}
|
||||
|
||||
|
||||
static void icvContourMoments( CvSeq* contour, CvMoments* mom )
|
||||
{
|
||||
if( contour->total )
|
||||
{
|
||||
CvSeqReader reader;
|
||||
int lpt = contour->total;
|
||||
double a00, a10, a01, a20, a11, a02, a30, a21, a12, a03;
|
||||
int dst_type = cv::ocl::Context::getContext()->impl->double_support ? CV_64FC1 : CV_32FC1;
|
||||
|
||||
cvStartReadSeq( contour, &reader, 0 );
|
||||
|
||||
cv::ocl::oclMat dst_a00(1,lpt,dst_type);
|
||||
cv::ocl::oclMat dst_a10(1,lpt,dst_type);
|
||||
cv::ocl::oclMat dst_a01(1,lpt,dst_type);
|
||||
cv::ocl::oclMat dst_a20(1,lpt,dst_type);
|
||||
cv::ocl::oclMat dst_a11(1,lpt,dst_type);
|
||||
cv::ocl::oclMat dst_a02(1,lpt,dst_type);
|
||||
cv::ocl::oclMat dst_a30(1,lpt,dst_type);
|
||||
cv::ocl::oclMat dst_a21(1,lpt,dst_type);
|
||||
cv::ocl::oclMat dst_a12(1,lpt,dst_type);
|
||||
cv::ocl::oclMat dst_a03(1,lpt,dst_type);
|
||||
size_t reader_size = lpt << 1;
|
||||
cv::Mat reader_mat(1,reader_size,CV_32FC1);
|
||||
|
||||
bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
|
||||
|
||||
if( is_float )
|
||||
{
|
||||
for(size_t i = 0; i < reader_size; ++i)
|
||||
{
|
||||
reader_mat.at<float>(0, i++) = ((CvPoint2D32f*)(reader.ptr))->x;
|
||||
reader_mat.at<float>(0, i) = ((CvPoint2D32f*)(reader.ptr))->y;
|
||||
CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(size_t i = 0; i < reader_size; ++i)
|
||||
{
|
||||
reader_mat.at<float>(0, i++) = ((CvPoint*)(reader.ptr))->x;
|
||||
reader_mat.at<float>(0, i) = ((CvPoint*)(reader.ptr))->y;
|
||||
CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
|
||||
}
|
||||
}
|
||||
|
||||
cv::ocl::oclMat reader_oclmat(reader_mat);
|
||||
int llength = std::min(lpt,128);
|
||||
size_t localThreads[3] = { llength, 1, 1};
|
||||
size_t globalThreads[3] = { lpt, 1, 1};
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&contour->total ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a00.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a10.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a01.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a20.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a11.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a02.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a30.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a21.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a12.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a03.data ));
|
||||
openCLExecuteKernel(dst_a00.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
cv::Mat dst(dst_a00);
|
||||
cv::Scalar s = cv::sum(dst);
|
||||
a00 = s[0];
|
||||
dst = dst_a10;
|
||||
s = cv::sum(dst);
|
||||
a10 = s[0];//dstsum[1];
|
||||
dst = dst_a01;
|
||||
s = cv::sum(dst);
|
||||
a01 = s[0];//dstsum[2];
|
||||
dst = dst_a20;
|
||||
s = cv::sum(dst);
|
||||
a20 = s[0];//dstsum[3];
|
||||
dst = dst_a11;
|
||||
s = cv::sum(dst);
|
||||
a11 = s[0];//dstsum[4];
|
||||
dst = dst_a02;
|
||||
s = cv::sum(dst);
|
||||
a02 = s[0];//dstsum[5];
|
||||
dst = dst_a30;
|
||||
s = cv::sum(dst);
|
||||
a30 = s[0];//dstsum[6];
|
||||
dst = dst_a21;
|
||||
s = cv::sum(dst);
|
||||
a21 = s[0];//dstsum[7];
|
||||
dst = dst_a12;
|
||||
s = cv::sum(dst);
|
||||
a12 = s[0];//dstsum[8];
|
||||
dst = dst_a03;
|
||||
s = cv::sum(dst);
|
||||
a03 = s[0];//dstsum[9];
|
||||
|
||||
double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60;
|
||||
if( fabs(a00) > FLT_EPSILON )
|
||||
{
|
||||
if( a00 > 0 )
|
||||
{
|
||||
db1_2 = 0.5;
|
||||
db1_6 = 0.16666666666666666666666666666667;
|
||||
db1_12 = 0.083333333333333333333333333333333;
|
||||
db1_24 = 0.041666666666666666666666666666667;
|
||||
db1_20 = 0.05;
|
||||
db1_60 = 0.016666666666666666666666666666667;
|
||||
}
|
||||
else
|
||||
{
|
||||
db1_2 = -0.5;
|
||||
db1_6 = -0.16666666666666666666666666666667;
|
||||
db1_12 = -0.083333333333333333333333333333333;
|
||||
db1_24 = -0.041666666666666666666666666666667;
|
||||
db1_20 = -0.05;
|
||||
db1_60 = -0.016666666666666666666666666666667;
|
||||
}
|
||||
|
||||
// spatial moments
|
||||
mom->m00 = a00 * db1_2;
|
||||
mom->m10 = a10 * db1_6;
|
||||
mom->m01 = a01 * db1_6;
|
||||
mom->m20 = a20 * db1_12;
|
||||
mom->m11 = a11 * db1_24;
|
||||
mom->m02 = a02 * db1_12;
|
||||
mom->m30 = a30 * db1_20;
|
||||
mom->m21 = a21 * db1_60;
|
||||
mom->m12 = a12 * db1_60;
|
||||
mom->m03 = a03 * db1_20;
|
||||
|
||||
icvCompleteMomentState( mom );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
|
||||
{
|
||||
const int TILE_SIZE = 256;
|
||||
int type, depth, cn, coi = 0;
|
||||
CvMat stub, *mat = (CvMat*)array;
|
||||
CvContour contourHeader;
|
||||
CvSeq* contour = 0;
|
||||
CvSeqBlock block;
|
||||
if( CV_IS_SEQ( array ))
|
||||
{
|
||||
contour = (CvSeq*)array;
|
||||
if( !CV_IS_SEQ_POINT_SET( contour ))
|
||||
CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" );
|
||||
}
|
||||
|
||||
if( !moments )
|
||||
CV_Error( CV_StsNullPtr, "" );
|
||||
|
||||
memset( mom, 0, sizeof(*mom));
|
||||
|
||||
if( !contour )
|
||||
{
|
||||
|
||||
mat = cvGetMat( mat, &stub, &coi );
|
||||
type = CV_MAT_TYPE( mat->type );
|
||||
|
||||
if( type == CV_32SC2 || type == CV_32FC2 )
|
||||
{
|
||||
contour = cvPointSeqFromMat(
|
||||
CV_SEQ_KIND_CURVE | CV_SEQ_FLAG_CLOSED,
|
||||
mat, &contourHeader, &block );
|
||||
}
|
||||
}
|
||||
if( contour )
|
||||
{
|
||||
icvContourMoments( contour, mom );
|
||||
return;
|
||||
}
|
||||
|
||||
type = CV_MAT_TYPE( mat->type );
|
||||
depth = CV_MAT_DEPTH( type );
|
||||
cn = CV_MAT_CN( type );
|
||||
|
||||
cv::Size size = cvGetMatSize( mat );
|
||||
if( cn > 1 && coi == 0 )
|
||||
CV_Error( CV_StsBadArg, "Invalid image type" );
|
||||
|
||||
if( size.width <= 0 || size.height <= 0 )
|
||||
return;
|
||||
|
||||
cv::Mat src0(mat);
|
||||
cv::ocl::oclMat src(src0);
|
||||
cv::Size tileSize;
|
||||
int blockx,blocky;
|
||||
if(size.width%TILE_SIZE == 0)
|
||||
blockx = size.width/TILE_SIZE;
|
||||
else
|
||||
blockx = size.width/TILE_SIZE + 1;
|
||||
if(size.height%TILE_SIZE == 0)
|
||||
blocky = size.height/TILE_SIZE;
|
||||
else
|
||||
blocky = size.height/TILE_SIZE + 1;
|
||||
cv::ocl::oclMat dst_m00(blocky, blockx, CV_64FC1);
|
||||
cv::ocl::oclMat dst_m10(blocky, blockx, CV_64FC1);
|
||||
cv::ocl::oclMat dst_m01(blocky, blockx, CV_64FC1);
|
||||
cv::ocl::oclMat dst_m20(blocky, blockx, CV_64FC1);
|
||||
cv::ocl::oclMat dst_m11(blocky, blockx, CV_64FC1);
|
||||
cv::ocl::oclMat dst_m02(blocky, blockx, CV_64FC1);
|
||||
cv::ocl::oclMat dst_m30(blocky, blockx, CV_64FC1);
|
||||
cv::ocl::oclMat dst_m21(blocky, blockx, CV_64FC1);
|
||||
cv::ocl::oclMat dst_m12(blocky, blockx, CV_64FC1);
|
||||
cv::ocl::oclMat dst_m03(blocky, blockx, CV_64FC1);
|
||||
cl_mem sum = openCLCreateBuffer(src.clCxt,CL_MEM_READ_WRITE,10*sizeof(double));
|
||||
int tile_width = std::min(size.width,TILE_SIZE);
|
||||
int tile_height = std::min(size.height,TILE_SIZE);
|
||||
size_t localThreads[3] = { tile_height, 1, 1};
|
||||
size_t globalThreads[3] = { size.height, blockx, 1};
|
||||
vector<pair<size_t , const void *> > args,args_sum;
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.width ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.height ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m00.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m10.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m01.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m20.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m11.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m02.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m30.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m21.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m12.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m03.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m00.cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m00.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&type ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&depth ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&cn ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&coi ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&binary ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
|
||||
openCLExecuteKernel(dst_m00.clCxt, &moments, "CvMoments", globalThreads, localThreads, args, -1, depth);
|
||||
|
||||
size_t localThreadss[3] = { 128, 1, 1};
|
||||
size_t globalThreadss[3] = { 128, 1, 1};
|
||||
args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_height ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_width ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&sum ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m00.data ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m10.data ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m01.data ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m20.data ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m11.data ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m02.data ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m30.data ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m21.data ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m12.data ));
|
||||
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m03.data ));
|
||||
openCLExecuteKernel(dst_m00.clCxt, &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1);
|
||||
double* dstsum = new double[10];
|
||||
memset(dstsum,0,10*sizeof(double));
|
||||
openCLReadBuffer(dst_m00.clCxt,sum,(void *)dstsum,10*sizeof(double));
|
||||
mom->m00 = dstsum[0];
|
||||
mom->m10 = dstsum[1];
|
||||
mom->m01 = dstsum[2];
|
||||
mom->m20 = dstsum[3];
|
||||
mom->m11 = dstsum[4];
|
||||
mom->m02 = dstsum[5];
|
||||
mom->m30 = dstsum[6];
|
||||
mom->m21 = dstsum[7];
|
||||
mom->m12 = dstsum[8];
|
||||
mom->m03 = dstsum[9];
|
||||
|
||||
icvCompleteMomentState( mom );
|
||||
}
|
||||
|
||||
Moments ocl_moments( InputArray _array, bool binaryImage )
|
||||
{
|
||||
CvMoments om;
|
||||
Mat arr = _array.getMat();
|
||||
CvMat c_array = arr;
|
||||
ocl_cvMoments(&c_array, &om, binaryImage);
|
||||
return om;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
72
modules/ocl/test/test_moments.cpp
Normal file
72
modules/ocl/test/test_moments.cpp
Normal file
@ -0,0 +1,72 @@
|
||||
#include "precomp.hpp"
|
||||
#include <iomanip>
|
||||
#include "opencv2/imgproc/imgproc_c.h"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace cvtest;
|
||||
using namespace testing;
|
||||
using namespace std;
|
||||
extern string workdir;
|
||||
PARAM_TEST_CASE(MomentsTestBase, MatType, bool)
|
||||
{
|
||||
int type;
|
||||
cv::Mat mat1;
|
||||
bool test_contours;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
test_contours = GET_PARAM(1);
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
cv::Size size(10*MWIDTH, 10*MHEIGHT);
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
}
|
||||
|
||||
void Compare(Moments& cpu, Moments& gpu)
|
||||
{
|
||||
Mat gpu_dst, cpu_dst;
|
||||
HuMoments(cpu, cpu_dst);
|
||||
HuMoments(gpu, gpu_dst);
|
||||
EXPECT_MAT_NEAR(gpu_dst,cpu_dst, .5, NULL);
|
||||
}
|
||||
|
||||
};
|
||||
struct ocl_Moments : MomentsTestBase {};
|
||||
|
||||
TEST_P(ocl_Moments, Mat)
|
||||
{
|
||||
bool binaryImage = 0;
|
||||
SetUp();
|
||||
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
if(test_contours)
|
||||
{
|
||||
Mat src = imread( workdir + "../cpp/pic3.png", 1 );
|
||||
Mat src_gray, canny_output;
|
||||
cvtColor( src, src_gray, CV_BGR2GRAY );
|
||||
vector<vector<Point> > contours;
|
||||
vector<Vec4i> hierarchy;
|
||||
Canny( src_gray, canny_output, 100, 200, 3 );
|
||||
findContours( canny_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
|
||||
for( size_t i = 0; i < contours.size(); i++ )
|
||||
{
|
||||
Moments m = moments( contours[i], false );
|
||||
Moments dm = ocl::ocl_moments( contours[i], false );
|
||||
Compare(m, dm);
|
||||
}
|
||||
}
|
||||
cv::_InputArray _array(mat1);
|
||||
cv::Moments CvMom = cv::moments(_array, binaryImage);
|
||||
cv::Moments oclMom = cv::ocl::ocl_moments(_array, binaryImage);
|
||||
|
||||
Compare(CvMom, oclMom);
|
||||
|
||||
}
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(Moments, ocl_Moments, Combine(
|
||||
Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_64FC1), Values(true,false)));
|
||||
#endif // HAVE_OPENCL
|
Loading…
x
Reference in New Issue
Block a user