433 lines
14 KiB
Common Lisp
433 lines
14 KiB
Common Lisp
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
//
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
// If you do not agree to this license, do not download, install,
|
|
// copy or use the software.
|
|
//
|
|
//
|
|
// License Agreement
|
|
// For Open Source Computer Vision Library
|
|
//
|
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
|
// Third party copyrights are property of their respective owners.
|
|
//
|
|
// @Authors
|
|
// Jin Ma, jin@multicorewareinc.com
|
|
// Sen Liu, swjtuls1987@126.com
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
// are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
//
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other materials provided with the distribution.
|
|
//
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
// derived from this software without specific prior written permission.
|
|
//
|
|
// This software is provided by the copyright holders and contributors as is and
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
//
|
|
//M*/
|
|
|
|
#if defined (DOUBLE_SUPPORT)
|
|
#ifdef cl_khr_fp64
|
|
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
|
#elif defined (cl_amd_fp64)
|
|
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
|
#endif
|
|
typedef double T;
|
|
#else
|
|
typedef long T;
|
|
#endif
|
|
|
|
#define DST_ROW_00 0
|
|
#define DST_ROW_10 1
|
|
#define DST_ROW_01 2
|
|
#define DST_ROW_20 3
|
|
#define DST_ROW_11 4
|
|
#define DST_ROW_02 5
|
|
#define DST_ROW_30 6
|
|
#define DST_ROW_21 7
|
|
#define DST_ROW_12 8
|
|
#define DST_ROW_03 9
|
|
|
|
__kernel void icvContourMoments(int contour_total,
|
|
__global float* reader_oclmat_data,
|
|
__global T* dst_a,
|
|
int dst_step)
|
|
{
|
|
T xi_1, yi_1, xi_12, yi_12, xi, yi, xi2, yi2, dxy, xii_1, yii_1;
|
|
int idx = get_global_id(0);
|
|
|
|
if (idx < 0 || idx >= contour_total)
|
|
return;
|
|
|
|
xi_1 = (T)(*(reader_oclmat_data + (get_global_id(0) << 1)));
|
|
yi_1 = (T)(*(reader_oclmat_data + (get_global_id(0) << 1) + 1));
|
|
xi_12 = xi_1 * xi_1;
|
|
yi_12 = yi_1 * yi_1;
|
|
|
|
if(idx == contour_total - 1)
|
|
{
|
|
xi = (T)(*(reader_oclmat_data));
|
|
yi = (T)(*(reader_oclmat_data + 1));
|
|
}
|
|
else
|
|
{
|
|
xi = (T)(*(reader_oclmat_data + (idx + 1) * 2));
|
|
yi = (T)(*(reader_oclmat_data + (idx + 1) * 2 + 1));
|
|
}
|
|
xi2 = xi * xi;
|
|
yi2 = yi * yi;
|
|
dxy = xi_1 * yi - xi * yi_1;
|
|
xii_1 = xi_1 + xi;
|
|
yii_1 = yi_1 + yi;
|
|
|
|
dst_step /= sizeof(T);
|
|
*( dst_a + DST_ROW_00 * dst_step + idx) = dxy;
|
|
*( dst_a + DST_ROW_10 * dst_step + idx) = dxy * xii_1;
|
|
*( dst_a + DST_ROW_01 * dst_step + idx) = dxy * yii_1;
|
|
*( dst_a + DST_ROW_20 * dst_step + idx) = dxy * (xi_1 * xii_1 + xi2);
|
|
*( dst_a + DST_ROW_11 * dst_step + idx) = dxy * (xi_1 * (yii_1 + yi_1) + xi * (yii_1 + yi));
|
|
*( dst_a + DST_ROW_02 * dst_step + idx) = dxy * (yi_1 * yii_1 + yi2);
|
|
*( dst_a + DST_ROW_30 * dst_step + idx) = dxy * xii_1 * (xi_12 + xi2);
|
|
*( dst_a + DST_ROW_03 * dst_step + idx) = dxy * yii_1 * (yi_12 + yi2);
|
|
*( dst_a + DST_ROW_21 * dst_step + idx) =
|
|
dxy * (xi_12 * (3 * yi_1 + yi) + 2 * xi * xi_1 * yii_1 +
|
|
xi2 * (yi_1 + 3 * yi));
|
|
*( dst_a + DST_ROW_12 * dst_step + idx) =
|
|
dxy * (yi_12 * (3 * xi_1 + xi) + 2 * yi * yi_1 * xii_1 +
|
|
yi2 * (xi_1 + 3 * xi));
|
|
}
|
|
|
|
#if defined (DOUBLE_SUPPORT)
|
|
#define WT double
|
|
#define WT4 double4
|
|
#define convert_T4 convert_double4
|
|
#define convert_T convert_double
|
|
#else
|
|
#define WT float
|
|
#define WT4 float4
|
|
#define convert_T4 convert_float4
|
|
#define convert_T convert_float
|
|
#endif
|
|
|
|
#ifdef CV_8UC1
|
|
#define TT uchar
|
|
#elif defined CV_16UC1
|
|
#define TT ushort
|
|
#elif defined CV_16SC1
|
|
#define TT short
|
|
#elif defined CV_32FC1
|
|
#define TT float
|
|
#elif defined CV_64FC1
|
|
#ifdef DOUBLE_SUPPORT
|
|
#define TT double
|
|
#else
|
|
#define TT float
|
|
#endif
|
|
#endif
|
|
__kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int src_step,
|
|
__global WT* dst_m,
|
|
int dst_cols, int dst_step, int binary)
|
|
{
|
|
int dy = get_global_id(1);
|
|
int ly = get_local_id(1);
|
|
int gidx = get_group_id(0);
|
|
int gidy = get_group_id(1);
|
|
int x_rest = src_cols % 256;
|
|
int y_rest = src_rows % 256;
|
|
__local int codxy[256];
|
|
codxy[ly] = ly;
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
WT4 x0 = (WT4)(0.f);
|
|
WT4 x1 = (WT4)(0.f);
|
|
WT4 x2 = (WT4)(0.f);
|
|
WT4 x3 = (WT4)(0.f);
|
|
|
|
__global TT* row = src_data + gidy * src_step + ly * src_step + gidx * 256;
|
|
|
|
WT4 p;
|
|
WT4 x;
|
|
WT4 xp;
|
|
WT4 xxp;
|
|
|
|
WT py = 0.f, sy = 0.f;
|
|
|
|
if(dy < src_rows)
|
|
{
|
|
if((x_rest > 0) && (gidx == ((int)get_num_groups(0) - 1)))
|
|
{
|
|
int i;
|
|
for(i = 0; i < x_rest - 4; i += 4)
|
|
{
|
|
p = convert_T4(vload4(0, row + i));
|
|
x = convert_T4(vload4(0, codxy + i));
|
|
xp = x * p;
|
|
xxp = xp * x;
|
|
|
|
x0 += p;
|
|
x1 += xp;
|
|
x2 += xxp;
|
|
x3 += convert_T4(xxp * x);
|
|
}
|
|
|
|
x0.s0 = x0.s0 + x0.s1 + x0.s2 + x0.s3;
|
|
x1.s0 = x1.s0 + x1.s1 + x1.s2 + x1.s3;
|
|
x2.s0 = x2.s0 + x2.s1 + x2.s2 + x2.s3;
|
|
x3.s0 = x3.s0 + x3.s1 + x3.s2 + x3.s3;
|
|
|
|
WT x0_ = 0;
|
|
WT x1_ = 0;
|
|
WT x2_ = 0;
|
|
WT x3_ = 0;
|
|
|
|
for(; i < x_rest; i++)
|
|
{
|
|
WT p_ = 0;
|
|
p_ = row[i];
|
|
WT x_ = convert_T(codxy[i]);
|
|
|
|
|
|
WT xp_ = x_ * p_;
|
|
WT xxp_ = xp_ * x_;
|
|
|
|
x0_ += p_;
|
|
x1_ += xp_;
|
|
x2_ += xxp_;
|
|
x3_ += xxp_ * x_;
|
|
}
|
|
|
|
x0.s0 += x0_;
|
|
x1.s0 += x1_;
|
|
x2.s0 += x2_;
|
|
x3.s0 += x3_;
|
|
}else
|
|
{
|
|
for(int i = 0; i < 256; i += 4)
|
|
{
|
|
p = convert_T4(vload4(0, row + i));
|
|
x = convert_T4(vload4(0, codxy + i));
|
|
xp = x * p;
|
|
xxp = xp * x;
|
|
|
|
x0 += p;
|
|
x1 += xp;
|
|
x2 += xxp;
|
|
x3 += convert_T4(xxp * x);
|
|
}
|
|
|
|
x0.s0 = x0.s0 + x0.s1 + x0.s2 + x0.s3;
|
|
x1.s0 = x1.s0 + x1.s1 + x1.s2 + x1.s3;
|
|
x2.s0 = x2.s0 + x2.s1 + x2.s2 + x2.s3;
|
|
x3.s0 = x3.s0 + x3.s1 + x3.s2 + x3.s3;
|
|
}
|
|
|
|
py = ly * x0.s0;
|
|
sy = ly * ly;
|
|
}
|
|
__local WT mom[10][256];
|
|
|
|
if((y_rest > 0) && (gidy == ((int)get_num_groups(1) - 1)))
|
|
{
|
|
if(ly < y_rest)
|
|
{
|
|
mom[9][ly] = py * sy;
|
|
mom[8][ly] = x1.s0 * sy;
|
|
mom[7][ly] = x2.s0 * ly;
|
|
mom[6][ly] = x3.s0;
|
|
mom[5][ly] = x0.s0 * sy;
|
|
mom[4][ly] = x1.s0 * ly;
|
|
mom[3][ly] = x2.s0;
|
|
mom[2][ly] = py;
|
|
mom[1][ly] = x1.s0;
|
|
mom[0][ly] = x0.s0;
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
if(ly < 10)
|
|
for(int i = 1; i < y_rest; i++)
|
|
mom[ly][0] = mom[ly][i] + mom[ly][0];
|
|
}
|
|
else
|
|
{
|
|
mom[9][ly] = py * sy;
|
|
mom[8][ly] = x1.s0 * sy;
|
|
mom[7][ly] = x2.s0 * ly;
|
|
mom[6][ly] = x3.s0;
|
|
mom[5][ly] = x0.s0 * sy;
|
|
mom[4][ly] = x1.s0 * ly;
|
|
mom[3][ly] = x2.s0;
|
|
mom[2][ly] = py;
|
|
mom[1][ly] = x1.s0;
|
|
mom[0][ly] = x0.s0;
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
if(ly < 128)
|
|
{
|
|
mom[0][ly] = mom[0][ly] + mom[0][ly + 128];
|
|
mom[1][ly] = mom[1][ly] + mom[1][ly + 128];
|
|
mom[2][ly] = mom[2][ly] + mom[2][ly + 128];
|
|
mom[3][ly] = mom[3][ly] + mom[3][ly + 128];
|
|
mom[4][ly] = mom[4][ly] + mom[4][ly + 128];
|
|
mom[5][ly] = mom[5][ly] + mom[5][ly + 128];
|
|
mom[6][ly] = mom[6][ly] + mom[6][ly + 128];
|
|
mom[7][ly] = mom[7][ly] + mom[7][ly + 128];
|
|
mom[8][ly] = mom[8][ly] + mom[8][ly + 128];
|
|
mom[9][ly] = mom[9][ly] + mom[9][ly + 128];
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
if(ly < 64)
|
|
{
|
|
mom[0][ly] = mom[0][ly] + mom[0][ly + 64];
|
|
mom[1][ly] = mom[1][ly] + mom[1][ly + 64];
|
|
mom[2][ly] = mom[2][ly] + mom[2][ly + 64];
|
|
mom[3][ly] = mom[3][ly] + mom[3][ly + 64];
|
|
mom[4][ly] = mom[4][ly] + mom[4][ly + 64];
|
|
mom[5][ly] = mom[5][ly] + mom[5][ly + 64];
|
|
mom[6][ly] = mom[6][ly] + mom[6][ly + 64];
|
|
mom[7][ly] = mom[7][ly] + mom[7][ly + 64];
|
|
mom[8][ly] = mom[8][ly] + mom[8][ly + 64];
|
|
mom[9][ly] = mom[9][ly] + mom[9][ly + 64];
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
if(ly < 32)
|
|
{
|
|
mom[0][ly] = mom[0][ly] + mom[0][ly + 32];
|
|
mom[1][ly] = mom[1][ly] + mom[1][ly + 32];
|
|
mom[2][ly] = mom[2][ly] + mom[2][ly + 32];
|
|
mom[3][ly] = mom[3][ly] + mom[3][ly + 32];
|
|
mom[4][ly] = mom[4][ly] + mom[4][ly + 32];
|
|
mom[5][ly] = mom[5][ly] + mom[5][ly + 32];
|
|
mom[6][ly] = mom[6][ly] + mom[6][ly + 32];
|
|
mom[7][ly] = mom[7][ly] + mom[7][ly + 32];
|
|
mom[8][ly] = mom[8][ly] + mom[8][ly + 32];
|
|
mom[9][ly] = mom[9][ly] + mom[9][ly + 32];
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
if(ly < 16)
|
|
{
|
|
mom[0][ly] = mom[0][ly] + mom[0][ly + 16];
|
|
mom[1][ly] = mom[1][ly] + mom[1][ly + 16];
|
|
mom[2][ly] = mom[2][ly] + mom[2][ly + 16];
|
|
mom[3][ly] = mom[3][ly] + mom[3][ly + 16];
|
|
mom[4][ly] = mom[4][ly] + mom[4][ly + 16];
|
|
mom[5][ly] = mom[5][ly] + mom[5][ly + 16];
|
|
mom[6][ly] = mom[6][ly] + mom[6][ly + 16];
|
|
mom[7][ly] = mom[7][ly] + mom[7][ly + 16];
|
|
mom[8][ly] = mom[8][ly] + mom[8][ly + 16];
|
|
mom[9][ly] = mom[9][ly] + mom[9][ly + 16];
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
if(ly < 8)
|
|
{
|
|
mom[0][ly] = mom[0][ly] + mom[0][ly + 8];
|
|
mom[1][ly] = mom[1][ly] + mom[1][ly + 8];
|
|
mom[2][ly] = mom[2][ly] + mom[2][ly + 8];
|
|
mom[3][ly] = mom[3][ly] + mom[3][ly + 8];
|
|
mom[4][ly] = mom[4][ly] + mom[4][ly + 8];
|
|
mom[5][ly] = mom[5][ly] + mom[5][ly + 8];
|
|
mom[6][ly] = mom[6][ly] + mom[6][ly + 8];
|
|
mom[7][ly] = mom[7][ly] + mom[7][ly + 8];
|
|
mom[8][ly] = mom[8][ly] + mom[8][ly + 8];
|
|
mom[9][ly] = mom[9][ly] + mom[9][ly + 8];
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
if(ly < 4)
|
|
{
|
|
mom[0][ly] = mom[0][ly] + mom[0][ly + 4];
|
|
mom[1][ly] = mom[1][ly] + mom[1][ly + 4];
|
|
mom[2][ly] = mom[2][ly] + mom[2][ly + 4];
|
|
mom[3][ly] = mom[3][ly] + mom[3][ly + 4];
|
|
mom[4][ly] = mom[4][ly] + mom[4][ly + 4];
|
|
mom[5][ly] = mom[5][ly] + mom[5][ly + 4];
|
|
mom[6][ly] = mom[6][ly] + mom[6][ly + 4];
|
|
mom[7][ly] = mom[7][ly] + mom[7][ly + 4];
|
|
mom[8][ly] = mom[8][ly] + mom[8][ly + 4];
|
|
mom[9][ly] = mom[9][ly] + mom[9][ly + 4];
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
if(ly < 2)
|
|
{
|
|
mom[0][ly] = mom[0][ly] + mom[0][ly + 2];
|
|
mom[1][ly] = mom[1][ly] + mom[1][ly + 2];
|
|
mom[2][ly] = mom[2][ly] + mom[2][ly + 2];
|
|
mom[3][ly] = mom[3][ly] + mom[3][ly + 2];
|
|
mom[4][ly] = mom[4][ly] + mom[4][ly + 2];
|
|
mom[5][ly] = mom[5][ly] + mom[5][ly + 2];
|
|
mom[6][ly] = mom[6][ly] + mom[6][ly + 2];
|
|
mom[7][ly] = mom[7][ly] + mom[7][ly + 2];
|
|
mom[8][ly] = mom[8][ly] + mom[8][ly + 2];
|
|
mom[9][ly] = mom[9][ly] + mom[9][ly + 2];
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
if(ly < 1)
|
|
{
|
|
mom[0][ly] = mom[0][ly] + mom[0][ly + 1];
|
|
mom[1][ly] = mom[1][ly] + mom[1][ly + 1];
|
|
mom[2][ly] = mom[2][ly] + mom[2][ly + 1];
|
|
mom[3][ly] = mom[3][ly] + mom[3][ly + 1];
|
|
mom[4][ly] = mom[4][ly] + mom[4][ly + 1];
|
|
mom[5][ly] = mom[5][ly] + mom[5][ly + 1];
|
|
mom[6][ly] = mom[6][ly] + mom[6][ly + 1];
|
|
mom[7][ly] = mom[7][ly] + mom[7][ly + 1];
|
|
mom[8][ly] = mom[8][ly] + mom[8][ly + 1];
|
|
mom[9][ly] = mom[9][ly] + mom[9][ly + 1];
|
|
}
|
|
}
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
if(binary)
|
|
{
|
|
WT s = 1.0f/255;
|
|
if(ly < 10)
|
|
mom[ly][0] *= s;
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
}
|
|
WT xm = (gidx * 256) * mom[0][0];
|
|
WT ym = (gidy * 256) * mom[0][0];
|
|
|
|
if(ly == 0)
|
|
{
|
|
mom[0][1] = mom[0][0];
|
|
mom[1][1] = mom[1][0] + xm;
|
|
mom[2][1] = mom[2][0] + ym;
|
|
mom[3][1] = mom[3][0] + gidx * 256 * (mom[1][0] * 2 + xm);
|
|
mom[4][1] = mom[4][0] + gidx * 256 * (mom[2][0] + ym) + gidy * 256 * mom[1][0];
|
|
mom[5][1] = mom[5][0] + gidy * 256 * (mom[2][0] * 2 + ym);
|
|
mom[6][1] = mom[6][0] + gidx * 256 * (3 * mom[3][0] + 256 * gidx * (3 * mom[1][0] + xm));
|
|
mom[7][1] = mom[7][0] + gidx * 256 * (2 * (mom[4][0] + 256 * gidy * mom[1][0]) + 256 * gidx * (mom[2][0] + ym)) + 256 * gidy * mom[3][0];
|
|
mom[8][1] = mom[8][0] + gidy * 256 * (2 * (mom[4][0] + 256 * gidx * mom[2][0]) + 256 * gidy * (mom[1][0] + xm)) + 256 * gidx * mom[5][0];
|
|
mom[9][1] = mom[9][0] + gidy * 256 * (3 * mom[5][0] + 256 * gidy * (3 * mom[2][0] + ym));
|
|
}
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
if(ly < 10)
|
|
dst_m[10 * gidy * dst_step + ly * dst_step + gidx] = mom[ly][1];
|
|
}
|