Merge pull request #1519 from ilya-lavrenov:ocl_minMax
This commit is contained in:
commit
154fe4f657
@ -584,7 +584,8 @@ namespace cv
|
||||
|
||||
CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
|
||||
|
||||
CV_EXPORTS void setIdentity(oclMat& src, double val);
|
||||
//! initializes a scaled identity matrix
|
||||
CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
|
||||
|
||||
//////////////////////////////// Filter Engine ////////////////////////////////
|
||||
|
||||
|
@ -64,11 +64,10 @@ namespace cv
|
||||
{
|
||||
//////////////////////////////// OpenCL kernel strings /////////////////////
|
||||
|
||||
extern const char *arithm_absdiff_nonsaturate;
|
||||
extern const char *arithm_nonzero;
|
||||
extern const char *arithm_sum;
|
||||
extern const char *arithm_sum_3;
|
||||
extern const char *arithm_minMax;
|
||||
extern const char *arithm_minMax_mask;
|
||||
extern const char *arithm_minMaxLoc;
|
||||
extern const char *arithm_minMaxLoc_mask;
|
||||
extern const char *arithm_LUT;
|
||||
@ -318,21 +317,28 @@ void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int
|
||||
////////////////////////////////// sum //////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//type = 0 sum,type = 1 absSum,type = 2 sqrSum
|
||||
static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, int type = 0)
|
||||
enum { SUM = 0, ABS_SUM, SQR_SUM };
|
||||
|
||||
static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int groupnum, int type, int ddepth)
|
||||
{
|
||||
vector<pair<size_t , const void *> > args;
|
||||
int all_cols = src.step / (vlen * src.elemSize1());
|
||||
int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1());
|
||||
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1;
|
||||
int ochannels = src.oclchannels();
|
||||
int all_cols = src.step / src.elemSize();
|
||||
int pre_cols = (src.offset % src.step) / src.elemSize();
|
||||
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
|
||||
int invalid_cols = pre_cols + sec_cols;
|
||||
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
|
||||
int offset = src.offset / (vlen * src.elemSize1());
|
||||
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
|
||||
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels();
|
||||
char build_options[512];
|
||||
CV_Assert(type == 0 || type == 1 || type == 2);
|
||||
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d -D FUNC_TYPE_%d", src.depth(), repeat_s, repeat_e, type);
|
||||
int offset = src.offset / src.elemSize();
|
||||
|
||||
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
||||
const char * const funcMap[] = { "FUNC_SUM", "FUNC_ABS_SUM", "FUNC_SQR_SUM" };
|
||||
const char * const channelMap[] = { " ", " ", "2", "4", "4" };
|
||||
string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s -D %s",
|
||||
typeMap[src.depth()], channelMap[ochannels],
|
||||
typeMap[ddepth], channelMap[ochannels],
|
||||
typeMap[ddepth], channelMap[ochannels],
|
||||
funcMap[type]);
|
||||
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset));
|
||||
@ -340,55 +346,63 @@ static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen ,
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
|
||||
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
|
||||
if (src.oclchannels() != 3)
|
||||
openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", gt, lt, args, -1, -1, build_options);
|
||||
else
|
||||
openCLExecuteKernel(src.clCxt, &arithm_sum_3, "arithm_op_sum_3", gt, lt, args, -1, -1, build_options);
|
||||
size_t globalThreads[3] = { groupnum * 256, 1, 1 };
|
||||
size_t localThreads[3] = { 256, 1, 1 };
|
||||
|
||||
openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", globalThreads, localThreads,
|
||||
args, -1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Scalar arithmetic_sum(const oclMat &src, int type = 0)
|
||||
Scalar arithmetic_sum(const oclMat &src, int type, int ddepth)
|
||||
{
|
||||
CV_Assert(src.step % src.elemSize() == 0);
|
||||
|
||||
size_t groupnum = src.clCxt->computeUnits();
|
||||
CV_Assert(groupnum != 0);
|
||||
int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen;
|
||||
|
||||
int dbsize = groupnum * src.oclchannels();
|
||||
Context *clCxt = src.clCxt;
|
||||
|
||||
AutoBuffer<T> _buf(dbsize);
|
||||
T *p = (T*)_buf;
|
||||
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T));
|
||||
Scalar s = Scalar::all(0.0);
|
||||
arithmetic_sum_buffer_run(src, dstBuffer, vlen, groupnum, type);
|
||||
|
||||
memset(p, 0, dbsize * sizeof(T));
|
||||
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T));
|
||||
for (int i = 0; i < dbsize;)
|
||||
{
|
||||
for (int j = 0; j < src.oclchannels(); j++, i++)
|
||||
s.val[j] += p[i];
|
||||
}
|
||||
|
||||
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T));
|
||||
arithmetic_sum_buffer_run(src, dstBuffer, groupnum, type, ddepth);
|
||||
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T));
|
||||
openCLFree(dstBuffer);
|
||||
|
||||
Scalar s = Scalar::all(0.0);
|
||||
for (int i = 0; i < dbsize;)
|
||||
for (int j = 0; j < src.oclchannels(); j++, i++)
|
||||
s.val[j] += p[i];
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
typedef Scalar (*sumFunc)(const oclMat &src, int type);
|
||||
typedef Scalar (*sumFunc)(const oclMat &src, int type, int ddepth);
|
||||
|
||||
Scalar cv::ocl::sum(const oclMat &src)
|
||||
{
|
||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||
{
|
||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
||||
}
|
||||
static sumFunc functab[2] =
|
||||
static sumFunc functab[3] =
|
||||
{
|
||||
arithmetic_sum<int>,
|
||||
arithmetic_sum<float>,
|
||||
arithmetic_sum<double>
|
||||
};
|
||||
|
||||
sumFunc func;
|
||||
func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)];
|
||||
return func(src, 0);
|
||||
bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
|
||||
int ddepth = std::max(src.depth(), CV_32S);
|
||||
if (!hasDouble && ddepth == CV_64F)
|
||||
ddepth = CV_32F;
|
||||
|
||||
sumFunc func = functab[ddepth - CV_32S];
|
||||
return func(src, SUM, ddepth);
|
||||
}
|
||||
|
||||
Scalar cv::ocl::absSum(const oclMat &src)
|
||||
@ -397,15 +411,20 @@ Scalar cv::ocl::absSum(const oclMat &src)
|
||||
{
|
||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
||||
}
|
||||
static sumFunc functab[2] =
|
||||
static sumFunc functab[3] =
|
||||
{
|
||||
arithmetic_sum<int>,
|
||||
arithmetic_sum<float>,
|
||||
arithmetic_sum<double>
|
||||
};
|
||||
|
||||
sumFunc func;
|
||||
func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)];
|
||||
return func(src, 1);
|
||||
bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
|
||||
int ddepth = std::max(src.depth(), CV_32S);
|
||||
if (!hasDouble && ddepth == CV_64F)
|
||||
ddepth = CV_32F;
|
||||
|
||||
sumFunc func = functab[ddepth - CV_32S];
|
||||
return func(src, ABS_SUM, ddepth);
|
||||
}
|
||||
|
||||
Scalar cv::ocl::sqrSum(const oclMat &src)
|
||||
@ -414,15 +433,18 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
|
||||
{
|
||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
||||
}
|
||||
static sumFunc functab[2] =
|
||||
static sumFunc functab[3] =
|
||||
{
|
||||
arithmetic_sum<float>,
|
||||
arithmetic_sum<int>,
|
||||
arithmetic_sum<double>,
|
||||
arithmetic_sum<double>
|
||||
};
|
||||
|
||||
sumFunc func;
|
||||
func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)];
|
||||
return func(src, 2);
|
||||
bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
|
||||
int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F);
|
||||
|
||||
sumFunc func = functab[ddepth - CV_32S];
|
||||
return func(src, SQR_SUM, ddepth);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -431,23 +453,15 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
|
||||
|
||||
void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
|
||||
{
|
||||
CV_Assert(src.depth() <= CV_32S);
|
||||
cv::Size sz(1, 1);
|
||||
int channels = src.oclchannels();
|
||||
Mat m1(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)),
|
||||
m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0));
|
||||
oclMat dst1(m1), dst2(m2);
|
||||
double total = 1.0 / src.size().area();
|
||||
|
||||
//arithmetic_sum_run(src, dst1,"arithm_op_sum");
|
||||
//arithmetic_sum_run(src, dst2,"arithm_op_squares_sum");
|
||||
mean = sum(src);
|
||||
stddev = sqrSum(src);
|
||||
|
||||
m1 = (Mat)dst1;
|
||||
m2 = (Mat)dst2;
|
||||
int i = 0, *p = (int *)m1.data, *q = (int *)m2.data;
|
||||
for (; i < channels; i++)
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
mean.val[i] = (double)p[i] / (src.cols * src.rows);
|
||||
stddev.val[i] = std::sqrt(std::max((double) q[i] / (src.cols * src.rows) - mean.val[i] * mean.val[i] , 0.));
|
||||
mean[i] *= total;
|
||||
stddev[i] = std::sqrt(std::max(stddev[i] * total - mean.val[i] * mean.val[i] , 0.));
|
||||
}
|
||||
}
|
||||
|
||||
@ -455,139 +469,120 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
|
||||
//////////////////////////////////// minMax /////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen , int groupnum, string kernelName)
|
||||
template <typename T, typename WT>
|
||||
static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem &dst, int groupnum, string kernelName)
|
||||
{
|
||||
vector<pair<size_t , const void *> > args;
|
||||
int all_cols = src.step / (vlen * src.elemSize1());
|
||||
int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1());
|
||||
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1;
|
||||
int all_cols = src.step / src.elemSize();
|
||||
int pre_cols = (src.offset % src.step) / src.elemSize();
|
||||
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
|
||||
int invalid_cols = pre_cols + sec_cols;
|
||||
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
|
||||
int offset = src.offset / (vlen * src.elemSize1());
|
||||
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
|
||||
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels();
|
||||
char build_options[50];
|
||||
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e);
|
||||
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
|
||||
int offset = src.offset / src.elemSize();
|
||||
|
||||
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
||||
const char * const channelMap[] = { " ", " ", "2", "4", "4" };
|
||||
|
||||
ostringstream stream;
|
||||
stream << "-D T=" << typeMap[src.depth()] << channelMap[src.channels()];
|
||||
stream << " -D MAX_VAL=" << (WT)numeric_limits<T>::max();
|
||||
stream << " -D MIN_VAL=" << (WT)numeric_limits<T>::min();
|
||||
string buildOptions = stream.str();
|
||||
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
|
||||
int minvalid_cols = 0, moffset = 0;
|
||||
if (!mask.empty())
|
||||
{
|
||||
int mall_cols = mask.step / (vlen * mask.elemSize1());
|
||||
int mpre_cols = (mask.offset % mask.step) / (vlen * mask.elemSize1());
|
||||
int msec_cols = mall_cols - (mask.offset % mask.step + mask.cols * mask.elemSize() - 1) / (vlen * mask.elemSize1()) - 1;
|
||||
int minvalid_cols = mpre_cols + msec_cols;
|
||||
int moffset = mask.offset / (vlen * mask.elemSize1());
|
||||
int mall_cols = mask.step / mask.elemSize();
|
||||
int mpre_cols = (mask.offset % mask.step) / mask.elemSize();
|
||||
int msec_cols = mall_cols - (mask.offset % mask.step + mask.cols * mask.elemSize() - 1) / mask.elemSize() - 1;
|
||||
minvalid_cols = mpre_cols + msec_cols;
|
||||
moffset = mask.offset / mask.elemSize();
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&minvalid_cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
|
||||
|
||||
kernelName += "_mask";
|
||||
}
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
|
||||
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
|
||||
openCLExecuteKernel(src.clCxt, &arithm_minMax, kernelName, gt, lt, args, -1, -1, build_options);
|
||||
|
||||
size_t globalThreads[3] = {groupnum * 256, 1, 1};
|
||||
size_t localThreads[3] = {256, 1, 1};
|
||||
|
||||
openCLExecuteKernel(src.clCxt, &arithm_minMax, kernelName, globalThreads, localThreads,
|
||||
args, -1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
||||
|
||||
static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen, int groupnum, string kernelName)
|
||||
{
|
||||
vector<pair<size_t , const void *> > args;
|
||||
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
|
||||
char build_options[50];
|
||||
if (src.oclchannels() == 1)
|
||||
{
|
||||
int cols = (src.cols - 1) / vlen + 1;
|
||||
int invalid_cols = src.step / (vlen * src.elemSize1()) - cols;
|
||||
int offset = src.offset / src.elemSize1();
|
||||
int repeat_me = vlen - (mask.cols % vlen == 0 ? vlen : mask.cols % vlen);
|
||||
int minvalid_cols = mask.step / (vlen * mask.elemSize1()) - cols;
|
||||
int moffset = mask.offset / mask.elemSize1();
|
||||
int elemnum = cols * src.rows;
|
||||
sprintf(build_options, "-D DEPTH_%d -D REPEAT_E%d", src.depth(), repeat_me);
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&minvalid_cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
|
||||
openCLExecuteKernel(src.clCxt, &arithm_minMax_mask, kernelName, gt, lt, args, -1, -1, build_options);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
|
||||
template <typename T, typename WT>
|
||||
void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
|
||||
const oclMat &mask, oclMat &buf)
|
||||
{
|
||||
size_t groupnum = src.clCxt->computeUnits();
|
||||
CV_Assert(groupnum != 0);
|
||||
groupnum = groupnum * 2;
|
||||
int vlen = 8;
|
||||
int dbsize = groupnum * 2 * vlen * sizeof(T) ;
|
||||
|
||||
int dbsize = groupnum * 2 * src.elemSize();
|
||||
ensureSizeIsEnough(1, dbsize, CV_8UC1, buf);
|
||||
|
||||
cl_mem buf_data = reinterpret_cast<cl_mem>(buf.data);
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax");
|
||||
}
|
||||
else
|
||||
{
|
||||
arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask");
|
||||
}
|
||||
arithmetic_minMax_run<T, WT>(src, mask, buf_data, groupnum, "arithm_op_minMax");
|
||||
|
||||
Mat matbuf = Mat(buf);
|
||||
T *p = matbuf.ptr<T>();
|
||||
if (minVal != NULL)
|
||||
{
|
||||
*minVal = std::numeric_limits<double>::max();
|
||||
for (int i = 0; i < vlen * (int)groupnum; i++)
|
||||
{
|
||||
for (int i = 0, end = src.oclchannels() * (int)groupnum; i < end; i++)
|
||||
*minVal = *minVal < p[i] ? *minVal : p[i];
|
||||
}
|
||||
}
|
||||
if (maxVal != NULL)
|
||||
{
|
||||
*maxVal = -std::numeric_limits<double>::max();
|
||||
for (int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
|
||||
{
|
||||
for (int i = src.oclchannels() * (int)groupnum, end = i << 1; i < end; i++)
|
||||
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf);
|
||||
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
|
||||
{
|
||||
oclMat buf;
|
||||
minMax_buf(src, minVal, maxVal, mask, buf);
|
||||
}
|
||||
|
||||
typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf);
|
||||
|
||||
void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf)
|
||||
{
|
||||
CV_Assert(src.oclchannels() == 1);
|
||||
CV_Assert(src.channels() == 1);
|
||||
CV_Assert(src.size() == mask.size() || mask.empty());
|
||||
CV_Assert(src.step % src.elemSize() == 0);
|
||||
|
||||
if (minVal == NULL && maxVal == NULL)
|
||||
return;
|
||||
|
||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||
{
|
||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
||||
}
|
||||
|
||||
static minMaxFunc functab[8] =
|
||||
{
|
||||
arithmetic_minMax<uchar>,
|
||||
arithmetic_minMax<char>,
|
||||
arithmetic_minMax<ushort>,
|
||||
arithmetic_minMax<short>,
|
||||
arithmetic_minMax<int>,
|
||||
arithmetic_minMax<float>,
|
||||
arithmetic_minMax<double>,
|
||||
arithmetic_minMax<uchar, int>,
|
||||
arithmetic_minMax<char, int>,
|
||||
arithmetic_minMax<ushort, int>,
|
||||
arithmetic_minMax<short, int>,
|
||||
arithmetic_minMax<int, int>,
|
||||
arithmetic_minMax<float, float>,
|
||||
arithmetic_minMax<double, double>,
|
||||
0
|
||||
};
|
||||
|
||||
minMaxFunc func;
|
||||
func = functab[src.depth()];
|
||||
func(src, minVal, maxVal, mask, buf);
|
||||
@ -599,57 +594,102 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons
|
||||
|
||||
double cv::ocl::norm(const oclMat &src1, int normType)
|
||||
{
|
||||
return norm(src1, oclMat(src1.size(), src1.type(), Scalar::all(0)), normType);
|
||||
CV_Assert((normType & NORM_RELATIVE) == 0);
|
||||
return norm(src1, oclMat(), normType);
|
||||
}
|
||||
|
||||
static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & src2, oclMat & diff)
|
||||
{
|
||||
CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0));
|
||||
Context *clCxt = src1.clCxt;
|
||||
|
||||
int ddepth = CV_64F;
|
||||
diff.create(src1.size(), CV_MAKE_TYPE(ddepth, src1.channels()));
|
||||
|
||||
int oclChannels = src1.oclchannels(), sdepth = src1.depth();
|
||||
int src1step1 = src1.step / src1.elemSize(), src1offset1 = src1.offset / src1.elemSize();
|
||||
int src2step1 = src2.step / src2.elemSize(), src2offset1 = src2.offset / src2.elemSize();
|
||||
int diffstep1 = diff.step / diff.elemSize(), diffoffset1 = diff.offset / diff.elemSize();
|
||||
|
||||
string kernelName = "arithm_absdiff_nonsaturate";
|
||||
size_t localThreads[3] = { 16, 16, 1 };
|
||||
size_t globalThreads[3] = { diff.cols, diff.rows, 1 };
|
||||
|
||||
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
||||
const char * const channelMap[] = { "", "", "2", "4", "4" };
|
||||
|
||||
std::string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s",
|
||||
typeMap[sdepth], channelMap[oclChannels],
|
||||
typeMap[ddepth], channelMap[oclChannels],
|
||||
typeMap[ddepth], channelMap[oclChannels]);
|
||||
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 ));
|
||||
|
||||
if (!src2.empty())
|
||||
{
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 ));
|
||||
|
||||
kernelName += "_binary";
|
||||
}
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&diff.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&diffstep1 ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&diffoffset1 ));
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
|
||||
|
||||
openCLExecuteKernel(clCxt, &arithm_absdiff_nonsaturate,
|
||||
kernelName, globalThreads, localThreads,
|
||||
args, -1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
||||
double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
|
||||
{
|
||||
CV_Assert(!src1.empty());
|
||||
CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size()));
|
||||
|
||||
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F)
|
||||
{
|
||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
||||
}
|
||||
|
||||
bool isRelative = (normType & NORM_RELATIVE) != 0;
|
||||
normType &= 7;
|
||||
CV_Assert(src1.depth() <= CV_32S && src1.type() == src2.type() && ( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2));
|
||||
int channels = src1.oclchannels(), i = 0, *p;
|
||||
normType &= NORM_TYPE_MASK;
|
||||
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
|
||||
|
||||
Scalar s;
|
||||
int cn = src1.channels();
|
||||
double r = 0;
|
||||
oclMat gm1(src1.size(), src1.type());
|
||||
int min_int = (normType == NORM_INF ? CL_INT_MIN : 0);
|
||||
Mat m(1, 1, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(min_int));
|
||||
oclMat gm2(m), emptyMat;
|
||||
switch(normType)
|
||||
oclMat diff;
|
||||
arithm_absdiff_nonsaturate_run(src1, src2, diff);
|
||||
|
||||
switch (normType)
|
||||
{
|
||||
case NORM_INF:
|
||||
// arithmetic_run(src1, src2, gm1, "arithm_op_absdiff");
|
||||
//arithmetic_minMax_run(gm1,emptyMat, gm2,"arithm_op_max");
|
||||
m = (gm2);
|
||||
p = (int *)m.data;
|
||||
r = -std::numeric_limits<double>::max();
|
||||
for (i = 0; i < channels; i++)
|
||||
{
|
||||
r = std::max(r, (double)p[i]);
|
||||
}
|
||||
diff = diff.reshape(1);
|
||||
minMax(diff, NULL, &r);
|
||||
break;
|
||||
case NORM_L1:
|
||||
//arithmetic_run(src1, src2, gm1, "arithm_op_absdiff");
|
||||
//arithmetic_sum_run(gm1, gm2,"arithm_op_sum");
|
||||
m = (gm2);
|
||||
p = (int *)m.data;
|
||||
for (i = 0; i < channels; i++)
|
||||
{
|
||||
r = r + (double)p[i];
|
||||
}
|
||||
s = sum(diff);
|
||||
for (int i = 0; i < cn; ++i)
|
||||
r += s[i];
|
||||
break;
|
||||
case NORM_L2:
|
||||
//arithmetic_run(src1, src2, gm1, "arithm_op_absdiff");
|
||||
//arithmetic_sum_run(gm1, gm2,"arithm_op_squares_sum");
|
||||
m = (gm2);
|
||||
p = (int *)m.data;
|
||||
for (i = 0; i < channels; i++)
|
||||
{
|
||||
r = r + (double)p[i];
|
||||
}
|
||||
s = sqrSum(diff);
|
||||
for (int i = 0; i < cn; ++i)
|
||||
r += s[i];
|
||||
r = std::sqrt(r);
|
||||
break;
|
||||
}
|
||||
if (isRelative)
|
||||
r = r / norm(src2, normType);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -923,47 +963,38 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && src1.rows == src2.rows && src2.rows == dst.rows);
|
||||
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
|
||||
|
||||
Context *clCxt = src1.clCxt;
|
||||
int channels = dst.oclchannels();
|
||||
int depth = dst.depth();
|
||||
|
||||
size_t vector_length = 1;
|
||||
int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
|
||||
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
|
||||
int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels();
|
||||
int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1();
|
||||
int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1();
|
||||
int dststep1 = dst.step / dst.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1();
|
||||
|
||||
size_t localThreads[3] = { 64, 4, 1 };
|
||||
size_t globalThreads[3] = { cols, dst.rows, 1 };
|
||||
size_t globalThreads[3] = { cols1, dst.rows, 1 };
|
||||
|
||||
int dst_step1 = dst.cols * dst.elemSize();
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
||||
|
||||
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
|
||||
}
|
||||
|
||||
void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angleInDegrees)
|
||||
void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees)
|
||||
{
|
||||
CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F));
|
||||
CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0);
|
||||
|
||||
Angle.create(x.size(), x.type());
|
||||
string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians";
|
||||
if (angleInDegrees)
|
||||
arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
|
||||
else
|
||||
arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
|
||||
arithmetic_phase_run(x, y, Angle, angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians", &arithm_phase);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -1228,21 +1259,22 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
|
||||
///////////////////////////// countNonZero ///////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, string kernelName)
|
||||
static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int groupnum, string kernelName)
|
||||
{
|
||||
vector<pair<size_t , const void *> > args;
|
||||
int all_cols = src.step / (vlen * src.elemSize1());
|
||||
int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1());
|
||||
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1;
|
||||
int ochannels = src.oclchannels();
|
||||
int all_cols = src.step / src.elemSize();
|
||||
int pre_cols = (src.offset % src.step) / src.elemSize();
|
||||
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
|
||||
int invalid_cols = pre_cols + sec_cols;
|
||||
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
|
||||
int offset = src.offset / (vlen * src.elemSize1());
|
||||
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
|
||||
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels();
|
||||
int offset = src.offset / src.elemSize();
|
||||
|
||||
char build_options[50];
|
||||
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e);
|
||||
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
||||
const char * const channelMap[] = { " ", " ", "2", "4", "4" };
|
||||
string buildOptions = format("-D srcT=%s%s -D dstT=int%s", typeMap[src.depth()], channelMap[ochannels],
|
||||
channelMap[ochannels]);
|
||||
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset));
|
||||
@ -1250,33 +1282,44 @@ static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen
|
||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
|
||||
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
|
||||
openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, gt, lt, args, -1, -1, build_options);
|
||||
|
||||
size_t globalThreads[3] = { groupnum * 256, 1, 1 };
|
||||
size_t localThreads[3] = { 256, 1, 1 };
|
||||
|
||||
openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, globalThreads, localThreads,
|
||||
args, -1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
||||
int cv::ocl::countNonZero(const oclMat &src)
|
||||
{
|
||||
size_t groupnum = src.clCxt->computeUnits();
|
||||
CV_Assert(src.step % src.elemSize() == 0);
|
||||
CV_Assert(src.channels() == 1);
|
||||
|
||||
Context *clCxt = src.clCxt;
|
||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||
{
|
||||
CV_Error(CV_GpuNotSupported, "selected device doesn't support double");
|
||||
}
|
||||
|
||||
size_t groupnum = src.clCxt->computeUnits();
|
||||
CV_Assert(groupnum != 0);
|
||||
int vlen = 8 , dbsize = groupnum * vlen;
|
||||
Context *clCxt = src.clCxt;
|
||||
int dbsize = groupnum;
|
||||
|
||||
string kernelName = "arithm_op_nonzero";
|
||||
|
||||
AutoBuffer<int> _buf(dbsize);
|
||||
int *p = (int*)_buf, nonzero = 0;
|
||||
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int));
|
||||
arithmetic_countNonZero_run(src, dstBuffer, vlen, groupnum, kernelName);
|
||||
|
||||
memset(p, 0, dbsize * sizeof(int));
|
||||
|
||||
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int));
|
||||
arithmetic_countNonZero_run(src, dstBuffer, groupnum, kernelName);
|
||||
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(int));
|
||||
|
||||
for (int i = 0; i < dbsize; i++)
|
||||
nonzero += p[i];
|
||||
|
||||
openCLSafeCall(clReleaseMemObject(dstBuffer));
|
||||
|
||||
return nonzero;
|
||||
}
|
||||
|
||||
@ -1522,8 +1565,8 @@ oclMatExpr::operator oclMat() const
|
||||
/////////////////////////////// transpose ////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define TILE_DIM (32)
|
||||
#define BLOCK_ROWS (256/TILE_DIM)
|
||||
#define TILE_DIM (32)
|
||||
#define BLOCK_ROWS (256 / TILE_DIM)
|
||||
|
||||
static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false)
|
||||
{
|
||||
@ -1702,63 +1745,35 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
|
||||
/////////////////////////////// setIdentity //////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void cv::ocl::setIdentity(oclMat& src, double scalar)
|
||||
void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar)
|
||||
{
|
||||
CV_Assert(src.empty() == false && src.rows == src.cols);
|
||||
CV_Assert(src.type() == CV_32SC1 || src.type() == CV_32FC1);
|
||||
int src_step = src.step/src.elemSize();
|
||||
Context *clCxt = Context::getContext();
|
||||
size_t local_threads[] = {16, 16, 1};
|
||||
size_t global_threads[] = {src.cols, src.rows, 1};
|
||||
|
||||
string kernelName = "setIdentityKernel";
|
||||
if (src.type() == CV_32FC1)
|
||||
kernelName += "_F1";
|
||||
else if (src.type() == CV_32SC1)
|
||||
kernelName += "_I1";
|
||||
else
|
||||
if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||
{
|
||||
kernelName += "_D1";
|
||||
if (!(clCxt->supportsFeature(Context::CL_DOUBLE)))
|
||||
{
|
||||
oclMat temp;
|
||||
src.convertTo(temp, CV_32FC1);
|
||||
temp.copyTo(src);
|
||||
}
|
||||
|
||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Assert(src.step % src.elemSize() == 0);
|
||||
|
||||
int src_step1 = src.step / src.elemSize(), src_offset1 = src.offset / src.elemSize();
|
||||
size_t local_threads[] = { 16, 16, 1 };
|
||||
size_t global_threads[] = { src.cols, src.rows, 1 };
|
||||
|
||||
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
||||
const char * const channelMap[] = { "", "", "2", "4", "4" };
|
||||
string buildOptions = format("-D T=%s%s", typeMap[src.depth()], channelMap[src.oclchannels()]);
|
||||
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step1 ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset1 ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
||||
|
||||
int scalar_i = 0;
|
||||
float scalar_f = 0.0f;
|
||||
if (clCxt->supportsFeature(Context::CL_DOUBLE))
|
||||
{
|
||||
if (src.type() == CV_32SC1)
|
||||
{
|
||||
scalar_i = (int)scalar;
|
||||
args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i));
|
||||
}
|
||||
else
|
||||
args.push_back(make_pair(sizeof(cl_double), (void*)&scalar));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (src.type() == CV_32SC1)
|
||||
{
|
||||
scalar_i = (int)scalar;
|
||||
args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i));
|
||||
}
|
||||
else
|
||||
{
|
||||
scalar_f = (float)scalar;
|
||||
args.push_back(make_pair(sizeof(cl_float), (void*)&scalar_f));
|
||||
}
|
||||
}
|
||||
oclMat sc(1, 1, src.type(), scalar);
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&sc.data ));
|
||||
|
||||
openCLExecuteKernel(clCxt, &arithm_setidentity, kernelName, global_threads, local_threads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &arithm_setidentity, "setIdentity", global_threads, local_threads,
|
||||
args, -1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
93
modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
Normal file
93
modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
Normal file
@ -0,0 +1,93 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
__kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset,
|
||||
__global srcT *src2, int src2_step, int src2_offset,
|
||||
__global dstT *dst, int dst_step, int dst_offset,
|
||||
int cols, int rows)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
dstT t1 = convertToDstT(src2[src2_index]);
|
||||
dstT t2 = t0 - t1;
|
||||
|
||||
dst[dst_index] = t2 >= 0 ? t2 : -t2;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset,
|
||||
__global dstT *dst, int dst_step, int dst_offset,
|
||||
int cols, int rows)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
|
||||
dst[dst_index] = t0 >= 0 ? t0 : -t0;
|
||||
}
|
||||
}
|
@ -53,169 +53,117 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined (DEPTH_0)
|
||||
#define VEC_TYPE uchar8
|
||||
#define CONVERT_TYPE convert_uchar8
|
||||
#define MIN_VAL 0
|
||||
#define MAX_VAL 255
|
||||
#endif
|
||||
#if defined (DEPTH_1)
|
||||
#define VEC_TYPE char8
|
||||
#define CONVERT_TYPE convert_char8
|
||||
#define MIN_VAL -128
|
||||
#define MAX_VAL 127
|
||||
#endif
|
||||
#if defined (DEPTH_2)
|
||||
#define VEC_TYPE ushort8
|
||||
#define CONVERT_TYPE convert_ushort8
|
||||
#define MIN_VAL 0
|
||||
#define MAX_VAL 65535
|
||||
#endif
|
||||
#if defined (DEPTH_3)
|
||||
#define VEC_TYPE short8
|
||||
#define CONVERT_TYPE convert_short8
|
||||
#define MIN_VAL -32768
|
||||
#define MAX_VAL 32767
|
||||
#endif
|
||||
#if defined (DEPTH_4)
|
||||
#define VEC_TYPE int8
|
||||
#define CONVERT_TYPE convert_int8
|
||||
#define MIN_VAL INT_MIN
|
||||
#define MAX_VAL INT_MAX
|
||||
#endif
|
||||
#if defined (DEPTH_5)
|
||||
#define VEC_TYPE float8
|
||||
#define CONVERT_TYPE convert_float8
|
||||
#define MIN_VAL (-FLT_MAX)
|
||||
#define MAX_VAL FLT_MAX
|
||||
#endif
|
||||
#if defined (DEPTH_6)
|
||||
#define VEC_TYPE double8
|
||||
#define CONVERT_TYPE convert_double8
|
||||
#define MIN_VAL (-DBL_MAX)
|
||||
#define MAX_VAL DBL_MAX
|
||||
#endif
|
||||
|
||||
#if defined (REPEAT_S0)
|
||||
#define repeat_s(a) a = a;
|
||||
#endif
|
||||
#if defined (REPEAT_S1)
|
||||
#define repeat_s(a) a.s0 = a.s1;
|
||||
#endif
|
||||
#if defined (REPEAT_S2)
|
||||
#define repeat_s(a) a.s0 = a.s2;a.s1 = a.s2;
|
||||
#endif
|
||||
#if defined (REPEAT_S3)
|
||||
#define repeat_s(a) a.s0 = a.s3;a.s1 = a.s3;a.s2 = a.s3;
|
||||
#endif
|
||||
#if defined (REPEAT_S4)
|
||||
#define repeat_s(a) a.s0 = a.s4;a.s1 = a.s4;a.s2 = a.s4;a.s3 = a.s4;
|
||||
#endif
|
||||
#if defined (REPEAT_S5)
|
||||
#define repeat_s(a) a.s0 = a.s5;a.s1 = a.s5;a.s2 = a.s5;a.s3 = a.s5;a.s4 = a.s5;
|
||||
#endif
|
||||
#if defined (REPEAT_S6)
|
||||
#define repeat_s(a) a.s0 = a.s6;a.s1 = a.s6;a.s2 = a.s6;a.s3 = a.s6;a.s4 = a.s6;a.s5 = a.s6;
|
||||
#endif
|
||||
#if defined (REPEAT_S7)
|
||||
#define repeat_s(a) a.s0 = a.s7;a.s1 = a.s7;a.s2 = a.s7;a.s3 = a.s7;a.s4 = a.s7;a.s5 = a.s7;a.s6 = a.s7;
|
||||
#endif
|
||||
|
||||
#if defined (REPEAT_E0)
|
||||
#define repeat_e(a) a = a;
|
||||
#endif
|
||||
#if defined (REPEAT_E1)
|
||||
#define repeat_e(a) a.s7 = a.s6;
|
||||
#endif
|
||||
#if defined (REPEAT_E2)
|
||||
#define repeat_e(a) a.s7 = a.s5;a.s6 = a.s5;
|
||||
#endif
|
||||
#if defined (REPEAT_E3)
|
||||
#define repeat_e(a) a.s7 = a.s4;a.s6 = a.s4;a.s5 = a.s4;
|
||||
#endif
|
||||
#if defined (REPEAT_E4)
|
||||
#define repeat_e(a) a.s7 = a.s3;a.s6 = a.s3;a.s5 = a.s3;a.s4 = a.s3;
|
||||
#endif
|
||||
#if defined (REPEAT_E5)
|
||||
#define repeat_e(a) a.s7 = a.s2;a.s6 = a.s2;a.s5 = a.s2;a.s4 = a.s2;a.s3 = a.s2;
|
||||
#endif
|
||||
#if defined (REPEAT_E6)
|
||||
#define repeat_e(a) a.s7 = a.s1;a.s6 = a.s1;a.s5 = a.s1;a.s4 = a.s1;a.s3 = a.s1;a.s2 = a.s1;
|
||||
#endif
|
||||
#if defined (REPEAT_E7)
|
||||
#define repeat_e(a) a.s7 = a.s0;a.s6 = a.s0;a.s5 = a.s0;a.s4 = a.s0;a.s3 = a.s0;a.s2 = a.s0;a.s1 = a.s0;
|
||||
#endif
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
|
||||
|
||||
/**************************************Array minMax**************************************/
|
||||
__kernel void arithm_op_minMax (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
|
||||
__global VEC_TYPE *src, __global VEC_TYPE *dst)
|
||||
|
||||
__kernel void arithm_op_minMax(__global const T * src, __global T * dst,
|
||||
int cols, int invalid_cols, int offset, int elemnum, int groupnum)
|
||||
{
|
||||
unsigned int lid = get_local_id(0);
|
||||
unsigned int gid = get_group_id(0);
|
||||
unsigned int id = get_global_id(0);
|
||||
unsigned int id = get_global_id(0);
|
||||
|
||||
unsigned int idx = offset + id + (id / cols) * invalid_cols;
|
||||
__local VEC_TYPE localmem_max[128],localmem_min[128];
|
||||
VEC_TYPE minval,maxval,temp;
|
||||
if(id < elemnum)
|
||||
{
|
||||
temp = src[idx];
|
||||
if(id % cols == 0 )
|
||||
{
|
||||
repeat_s(temp);
|
||||
}
|
||||
if(id % cols == cols - 1)
|
||||
{
|
||||
repeat_e(temp);
|
||||
}
|
||||
minval = temp;
|
||||
maxval = temp;
|
||||
}
|
||||
else
|
||||
{
|
||||
minval = MAX_VAL;
|
||||
maxval = MIN_VAL;
|
||||
}
|
||||
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
|
||||
|
||||
__local T localmem_max[128], localmem_min[128];
|
||||
T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
|
||||
|
||||
for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
|
||||
{
|
||||
idx = offset + id + (id / cols) * invalid_cols;
|
||||
temp = src[idx];
|
||||
if(id % cols == 0 )
|
||||
{
|
||||
repeat_s(temp);
|
||||
}
|
||||
if(id % cols == cols - 1)
|
||||
{
|
||||
repeat_e(temp);
|
||||
}
|
||||
minval = min(minval,temp);
|
||||
maxval = max(maxval,temp);
|
||||
minval = min(minval, temp);
|
||||
maxval = max(maxval, temp);
|
||||
}
|
||||
|
||||
if(lid > 127)
|
||||
{
|
||||
localmem_min[lid - 128] = minval;
|
||||
localmem_max[lid - 128] = maxval;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(lid < 128)
|
||||
{
|
||||
localmem_min[lid] = min(minval,localmem_min[lid]);
|
||||
localmem_max[lid] = max(maxval,localmem_max[lid]);
|
||||
localmem_min[lid] = min(minval, localmem_min[lid]);
|
||||
localmem_max[lid] = max(maxval, localmem_max[lid]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for(int lsize = 64; lsize > 0; lsize >>= 1)
|
||||
|
||||
for (int lsize = 64; lsize > 0; lsize >>= 1)
|
||||
{
|
||||
if(lid < lsize)
|
||||
if (lid < lsize)
|
||||
{
|
||||
int lid2 = lsize + lid;
|
||||
localmem_min[lid] = min(localmem_min[lid] , localmem_min[lid2]);
|
||||
localmem_max[lid] = max(localmem_max[lid] , localmem_max[lid2]);
|
||||
localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
|
||||
localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if( lid == 0)
|
||||
|
||||
if (lid == 0)
|
||||
{
|
||||
dst[gid] = localmem_min[0];
|
||||
dst[gid + groupnum] = localmem_max[0];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst,
|
||||
int cols, int invalid_cols, int offset,
|
||||
int elemnum, int groupnum,
|
||||
const __global uchar * mask, int minvalid_cols, int moffset)
|
||||
{
|
||||
unsigned int lid = get_local_id(0);
|
||||
unsigned int gid = get_group_id(0);
|
||||
unsigned int id = get_global_id(0);
|
||||
|
||||
unsigned int idx = offset + id + (id / cols) * invalid_cols;
|
||||
unsigned int midx = moffset + id + (id / cols) * minvalid_cols;
|
||||
|
||||
__local T localmem_max[128], localmem_min[128];
|
||||
T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
|
||||
|
||||
for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
|
||||
{
|
||||
idx = offset + id + (id / cols) * invalid_cols;
|
||||
midx = moffset + id + (id / cols) * minvalid_cols;
|
||||
|
||||
if (mask[midx])
|
||||
{
|
||||
temp = src[idx];
|
||||
minval = min(minval, temp);
|
||||
maxval = max(maxval, temp);
|
||||
}
|
||||
}
|
||||
|
||||
if(lid > 127)
|
||||
{
|
||||
localmem_min[lid - 128] = minval;
|
||||
localmem_max[lid - 128] = maxval;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(lid < 128)
|
||||
{
|
||||
localmem_min[lid] = min(minval, localmem_min[lid]);
|
||||
localmem_max[lid] = max(maxval, localmem_max[lid]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int lsize = 64; lsize > 0; lsize >>= 1)
|
||||
{
|
||||
if (lid < lsize)
|
||||
{
|
||||
int lid2 = lsize + lid;
|
||||
localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
|
||||
localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (lid == 0)
|
||||
{
|
||||
dst[gid] = localmem_min[0];
|
||||
dst[gid + groupnum] = localmem_max[0];
|
||||
|
@ -41,151 +41,53 @@
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
///
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
#if defined (DEPTH_0)
|
||||
#define VEC_TYPE uchar8
|
||||
#endif
|
||||
#if defined (DEPTH_1)
|
||||
#define VEC_TYPE char8
|
||||
#endif
|
||||
#if defined (DEPTH_2)
|
||||
#define VEC_TYPE ushort8
|
||||
#endif
|
||||
#if defined (DEPTH_3)
|
||||
#define VEC_TYPE short8
|
||||
#endif
|
||||
#if defined (DEPTH_4)
|
||||
#define VEC_TYPE int8
|
||||
#endif
|
||||
#if defined (DEPTH_5)
|
||||
#define VEC_TYPE float8
|
||||
#endif
|
||||
#if defined (DEPTH_6)
|
||||
#define VEC_TYPE double8
|
||||
#endif
|
||||
|
||||
#if defined (REPEAT_S0)
|
||||
#define repeat_s(a) a = a;
|
||||
#endif
|
||||
#if defined (REPEAT_S1)
|
||||
#define repeat_s(a) a.s0 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S2)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S3)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S4)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S5)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S6)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S7)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0;
|
||||
#endif
|
||||
|
||||
#if defined (REPEAT_E0)
|
||||
#define repeat_e(a) a = a;
|
||||
#endif
|
||||
#if defined (REPEAT_E1)
|
||||
#define repeat_e(a) a.s7 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E2)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E3)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E4)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E5)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E6)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E7)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0;
|
||||
#endif
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
|
||||
|
||||
/**************************************Count NonZero**************************************/
|
||||
__kernel void arithm_op_nonzero (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
|
||||
__global VEC_TYPE *src, __global int8 *dst)
|
||||
|
||||
__kernel void arithm_op_nonzero(int cols, int invalid_cols, int offset, int elemnum, int groupnum,
|
||||
__global srcT *src, __global dstT *dst)
|
||||
{
|
||||
unsigned int lid = get_local_id(0);
|
||||
unsigned int gid = get_group_id(0);
|
||||
unsigned int id = get_global_id(0);
|
||||
unsigned int idx = offset + id + (id / cols) * invalid_cols;
|
||||
__local int8 localmem_nonzero[128];
|
||||
int8 nonzero;
|
||||
VEC_TYPE zero=0,one=1,temp;
|
||||
if(id < elemnum)
|
||||
{
|
||||
temp = src[idx];
|
||||
if(id % cols == 0 )
|
||||
{
|
||||
repeat_s(temp);
|
||||
}
|
||||
if(id % cols == cols - 1)
|
||||
{
|
||||
repeat_e(temp);
|
||||
}
|
||||
nonzero = convert_int8(temp == zero ? zero:one);
|
||||
}
|
||||
else
|
||||
{
|
||||
nonzero = 0;
|
||||
}
|
||||
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
|
||||
{
|
||||
idx = offset + id + (id / cols) * invalid_cols;
|
||||
temp = src[idx];
|
||||
if(id % cols == 0 )
|
||||
{
|
||||
repeat_s(temp);
|
||||
}
|
||||
if(id % cols == cols - 1)
|
||||
{
|
||||
repeat_e(temp);
|
||||
}
|
||||
nonzero = nonzero + convert_int8(temp == zero ? zero:one);
|
||||
}
|
||||
if(lid > 127)
|
||||
{
|
||||
localmem_nonzero[lid - 128] = nonzero;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lid < 128)
|
||||
{
|
||||
localmem_nonzero[lid] = nonzero + localmem_nonzero[lid];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for(int lsize = 64; lsize > 0; lsize >>= 1)
|
||||
{
|
||||
if(lid < lsize)
|
||||
{
|
||||
unsigned int lid = get_local_id(0);
|
||||
unsigned int gid = get_group_id(0);
|
||||
unsigned int id = get_global_id(0);
|
||||
|
||||
unsigned int idx = offset + id + (id / cols) * invalid_cols;
|
||||
__local dstT localmem_nonzero[128];
|
||||
dstT nonzero = (dstT)(0);
|
||||
srcT zero = (srcT)(0), one = (srcT)(1);
|
||||
|
||||
for (int grain = groupnum << 8; id < elemnum; id += grain)
|
||||
{
|
||||
idx = offset + id + (id / cols) * invalid_cols;
|
||||
nonzero += src[idx] == zero ? zero : one;
|
||||
}
|
||||
|
||||
if (lid > 127)
|
||||
localmem_nonzero[lid - 128] = nonzero;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lid < 128)
|
||||
localmem_nonzero[lid] = nonzero + localmem_nonzero[lid];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int lsize = 64; lsize > 0; lsize >>= 1)
|
||||
{
|
||||
if (lid < lsize)
|
||||
{
|
||||
int lid2 = lsize + lid;
|
||||
localmem_nonzero[lid] = localmem_nonzero[lid] + localmem_nonzero[lid2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if( lid == 0)
|
||||
{
|
||||
dst[gid] = localmem_nonzero[0];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (lid == 0)
|
||||
dst[gid] = localmem_nonzero[0];
|
||||
}
|
||||
|
@ -45,110 +45,125 @@
|
||||
//
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
#define CV_PI 3.1415926535898
|
||||
/**************************************phase inradians**************************************/
|
||||
__kernel void arithm_phase_inradians_D5 (__global float *src1, int src1_step, int src1_offset,
|
||||
__global float *src2, int src2_step, int src2_offset,
|
||||
__global float *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, int dst_step1)
|
||||
{
|
||||
|
||||
#define CV_PI 3.1415926535898
|
||||
#define CV_2PI 2*3.1415926535898
|
||||
|
||||
/**************************************phase inradians**************************************/
|
||||
|
||||
__kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1,
|
||||
__global float *src2, int src2_step1, int src2_offset1,
|
||||
__global float *dst, int dst_step1, int dst_offset1,
|
||||
int cols, int rows)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < cols && y < rows)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
|
||||
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
|
||||
int src1_index = mad24(y, src1_step1, x + src1_offset1);
|
||||
int src2_index = mad24(y, src2_step1, x + src2_offset1);
|
||||
int dst_index = mad24(y, dst_step1, x + dst_offset1);
|
||||
|
||||
float data1 = *((__global float *)((__global char *)src1 + src1_index));
|
||||
float data2 = *((__global float *)((__global char *)src2 + src2_index));
|
||||
float tmp = atan2(data2,data1);
|
||||
float data1 = src1[src1_index];
|
||||
float data2 = src2[src2_index];
|
||||
float tmp = atan2(data2, data1);
|
||||
|
||||
*((__global float *)((__global char *)dst + dst_index)) = tmp;
|
||||
if (tmp < 0)
|
||||
tmp += CV_2PI;
|
||||
|
||||
dst[dst_index] = tmp;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_phase_inradians_D6 (__global double *src1, int src1_step, int src1_offset,
|
||||
__global double *src2, int src2_step, int src2_offset,
|
||||
__global double *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, int dst_step1)
|
||||
__kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1,
|
||||
__global double *src2, int src2_step1, int src2_offset1,
|
||||
__global double *dst, int dst_step1, int dst_offset1,
|
||||
int cols, int rows)
|
||||
{
|
||||
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < cols && y < rows)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
|
||||
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
|
||||
int src1_index = mad24(y, src1_step1, x + src1_offset1);
|
||||
int src2_index = mad24(y, src2_step1, x + src2_offset1);
|
||||
int dst_index = mad24(y, dst_step1, x + dst_offset1);
|
||||
|
||||
double data1 = *((__global double *)((__global char *)src1 + src1_index));
|
||||
double data2 = *((__global double *)((__global char *)src2 + src2_index));
|
||||
double data1 = src1[src1_index];
|
||||
double data2 = src2[src2_index];
|
||||
double tmp = atan2(data2, data1);
|
||||
|
||||
*((__global double *)((__global char *)dst + dst_index)) = atan2(data2,data1);
|
||||
if (tmp < 0)
|
||||
tmp += CV_2PI;
|
||||
|
||||
dst[dst_index] = tmp;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**************************************phase indegrees**************************************/
|
||||
__kernel void arithm_phase_indegrees_D5 (__global float *src1, int src1_step, int src1_offset,
|
||||
__global float *src2, int src2_step, int src2_offset,
|
||||
__global float *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, int dst_step1)
|
||||
{
|
||||
|
||||
__kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1,
|
||||
__global float *src2, int src2_step1, int src2_offset1,
|
||||
__global float *dst, int dst_step1, int dst_offset1,
|
||||
int cols, int rows)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < cols && y < rows)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
|
||||
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
|
||||
int src1_index = mad24(y, src1_step1, x + src1_offset1);
|
||||
int src2_index = mad24(y, src2_step1, x + src2_offset1);
|
||||
int dst_index = mad24(y, dst_step1, x + dst_offset1);
|
||||
|
||||
float data1 = *((__global float *)((__global char *)src1 + src1_index));
|
||||
float data2 = *((__global float *)((__global char *)src2 + src2_index));
|
||||
float tmp = atan2(data2,data1);
|
||||
float tmp_data = 180*tmp/CV_PI;
|
||||
float data1 = src1[src1_index];
|
||||
float data2 = src2[src2_index];
|
||||
float tmp = atan2(data2, data1);
|
||||
tmp = 180 * tmp / CV_PI;
|
||||
|
||||
*((__global float *)((__global char *)dst + dst_index)) = tmp_data;
|
||||
if (tmp < 0)
|
||||
tmp += 360;
|
||||
|
||||
dst[dst_index] = tmp;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step, int src1_offset,
|
||||
__global double *src2, int src2_step, int src2_offset,
|
||||
__global double *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, int dst_step1)
|
||||
__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1,
|
||||
__global double *src2, int src2_step1, int src2_offset1,
|
||||
__global double *dst, int dst_step1, int dst_offset1,
|
||||
int cols, int rows)
|
||||
{
|
||||
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < cols && y < rows)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
|
||||
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
|
||||
int src1_index = mad24(y, src1_step1, x + src1_offset1);
|
||||
int src2_index = mad24(y, src2_step1, x + src2_offset1);
|
||||
int dst_index = mad24(y, dst_step1, x + dst_offset1);
|
||||
|
||||
double data1 = *((__global double *)((__global char *)src1 + src1_index));
|
||||
double data2 = *((__global double *)((__global char *)src2 + src2_index));
|
||||
double tmp = atan2(data2,data1);
|
||||
double tmp_data = 180*tmp/CV_PI;
|
||||
double data1 = src1[src1_index];
|
||||
double data2 = src2[src2_index];
|
||||
double tmp = atan2(src2[src2_index], src1[src1_index]);
|
||||
|
||||
*((__global double *)((__global char *)dst + dst_index)) = tmp_data;
|
||||
tmp = 180 * tmp / CV_PI;
|
||||
if (tmp < 0)
|
||||
tmp += 360;
|
||||
|
||||
dst[dst_index] = tmp;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -42,6 +42,7 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
@ -50,51 +51,19 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#define DATA_TYPE double
|
||||
#else
|
||||
#define DATA_TYPE float
|
||||
#endif
|
||||
|
||||
__kernel void setIdentityKernel_F1(__global float* src, int src_row, int src_col, int src_step, DATA_TYPE scalar)
|
||||
__kernel void setIdentity(__global T * src, int src_step, int src_offset,
|
||||
int cols, int rows, __global const T * scalar)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < src_col && y < src_row)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
if(x == y)
|
||||
src[y * src_step + x] = scalar;
|
||||
int src_index = mad24(y, src_step, src_offset + x);
|
||||
|
||||
if (x == y)
|
||||
src[src_index] = *scalar;
|
||||
else
|
||||
src[y * src_step + x] = 0 * scalar;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void setIdentityKernel_D1(__global DATA_TYPE* src, int src_row, int src_col, int src_step, DATA_TYPE scalar)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < src_col && y < src_row)
|
||||
{
|
||||
if(x == y)
|
||||
src[y * src_step + x] = scalar;
|
||||
else
|
||||
src[y * src_step + x] = 0 * scalar;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void setIdentityKernel_I1(__global int* src, int src_row, int src_col, int src_step, int scalar)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < src_col && y < src_row)
|
||||
{
|
||||
if(x == y)
|
||||
src[y * src_step + x] = scalar;
|
||||
else
|
||||
src[y * src_step + x] = 0 * scalar;
|
||||
src[src_index] = 0;
|
||||
}
|
||||
}
|
||||
|
@ -43,163 +43,62 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#define RES_TYPE double8
|
||||
#define CONVERT_RES_TYPE convert_double8
|
||||
#else
|
||||
#define RES_TYPE float8
|
||||
#define CONVERT_RES_TYPE convert_float8
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined (DEPTH_0)
|
||||
#define VEC_TYPE uchar8
|
||||
#if defined (FUNC_SUM)
|
||||
#define FUNC(a, b) b += a;
|
||||
#endif
|
||||
#if defined (DEPTH_1)
|
||||
#define VEC_TYPE char8
|
||||
#if defined (FUNC_ABS_SUM)
|
||||
#define FUNC(a, b) b += a >= 0 ? a : -a;
|
||||
#endif
|
||||
#if defined (DEPTH_2)
|
||||
#define VEC_TYPE ushort8
|
||||
#if defined (FUNC_SQR_SUM)
|
||||
#define FUNC(a, b) b += a * a;
|
||||
#endif
|
||||
#if defined (DEPTH_3)
|
||||
#define VEC_TYPE short8
|
||||
#endif
|
||||
#if defined (DEPTH_4)
|
||||
#define VEC_TYPE int8
|
||||
#endif
|
||||
#if defined (DEPTH_5)
|
||||
#define VEC_TYPE float8
|
||||
#endif
|
||||
#if defined (DEPTH_6)
|
||||
#define VEC_TYPE double8
|
||||
#endif
|
||||
|
||||
#if defined (FUNC_TYPE_0)
|
||||
#define FUNC(a,b) b += a;
|
||||
#endif
|
||||
#if defined (FUNC_TYPE_1)
|
||||
#define FUNC(a,b) b = b + (a >= 0 ? a : -a);
|
||||
#endif
|
||||
#if defined (FUNC_TYPE_2)
|
||||
#define FUNC(a,b) b = b + a * a;
|
||||
#endif
|
||||
|
||||
#if defined (REPEAT_S0)
|
||||
#define repeat_s(a) a = a;
|
||||
#endif
|
||||
#if defined (REPEAT_S1)
|
||||
#define repeat_s(a) a.s0 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S2)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S3)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S4)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S5)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S6)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_S7)
|
||||
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0;
|
||||
#endif
|
||||
|
||||
#if defined (REPEAT_E0)
|
||||
#define repeat_e(a) a = a;
|
||||
#endif
|
||||
#if defined (REPEAT_E1)
|
||||
#define repeat_e(a) a.s7 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E2)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E3)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E4)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E5)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E6)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;
|
||||
#endif
|
||||
#if defined (REPEAT_E7)
|
||||
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0;
|
||||
#endif
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
|
||||
|
||||
/**************************************Array buffer SUM**************************************/
|
||||
__kernel void arithm_op_sum (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
|
||||
__global VEC_TYPE *src, __global RES_TYPE *dst)
|
||||
|
||||
__kernel void arithm_op_sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum,
|
||||
__global srcT *src, __global dstT *dst)
|
||||
{
|
||||
unsigned int lid = get_local_id(0);
|
||||
unsigned int gid = get_group_id(0);
|
||||
unsigned int id = get_global_id(0);
|
||||
unsigned int id = get_global_id(0);
|
||||
unsigned int idx = offset + id + (id / cols) * invalid_cols;
|
||||
__local RES_TYPE localmem_sum[128];
|
||||
RES_TYPE sum = 0,temp;
|
||||
if(id < elemnum)
|
||||
{
|
||||
temp = CONVERT_RES_TYPE(src[idx]);
|
||||
if(id % cols == 0 )
|
||||
{
|
||||
repeat_s(temp);
|
||||
}
|
||||
if(id % cols == cols - 1)
|
||||
{
|
||||
repeat_e(temp);
|
||||
}
|
||||
FUNC(temp,sum);
|
||||
}
|
||||
else
|
||||
{
|
||||
sum = 0;
|
||||
}
|
||||
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
|
||||
|
||||
__local dstT localmem_sum[128];
|
||||
dstT sum = (dstT)(0), temp;
|
||||
|
||||
for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
|
||||
{
|
||||
idx = offset + id + (id / cols) * invalid_cols;
|
||||
temp = CONVERT_RES_TYPE(src[idx]);
|
||||
if(id % cols == 0 )
|
||||
{
|
||||
repeat_s(temp);
|
||||
}
|
||||
if(id % cols == cols - 1)
|
||||
{
|
||||
repeat_e(temp);
|
||||
}
|
||||
FUNC(temp,sum);
|
||||
temp = convertToDstT(src[idx]);
|
||||
FUNC(temp, sum);
|
||||
}
|
||||
if(lid > 127)
|
||||
{
|
||||
|
||||
if (lid > 127)
|
||||
localmem_sum[lid - 128] = sum;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lid < 128)
|
||||
{
|
||||
|
||||
if (lid < 128)
|
||||
localmem_sum[lid] = sum + localmem_sum[lid];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for(int lsize = 64; lsize > 0; lsize >>= 1)
|
||||
|
||||
for (int lsize = 64; lsize > 0; lsize >>= 1)
|
||||
{
|
||||
if(lid < lsize)
|
||||
if (lid < lsize)
|
||||
{
|
||||
int lid2 = lsize + lid;
|
||||
localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if( lid == 0)
|
||||
{
|
||||
|
||||
if (lid == 0)
|
||||
dst[gid] = localmem_sum[0];
|
||||
}
|
||||
}
|
||||
|
@ -1,247 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Shengen Yan,yanshengen@gmail.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#define RES_TYPE double4
|
||||
#define CONVERT_RES_TYPE convert_double4
|
||||
#else
|
||||
#define RES_TYPE float4
|
||||
#define CONVERT_RES_TYPE convert_float4
|
||||
#endif
|
||||
|
||||
#if defined (DEPTH_0)
|
||||
#define VEC_TYPE uchar4
|
||||
#endif
|
||||
#if defined (DEPTH_1)
|
||||
#define VEC_TYPE char4
|
||||
#endif
|
||||
#if defined (DEPTH_2)
|
||||
#define VEC_TYPE ushort4
|
||||
#endif
|
||||
#if defined (DEPTH_3)
|
||||
#define VEC_TYPE short4
|
||||
#endif
|
||||
#if defined (DEPTH_4)
|
||||
#define VEC_TYPE int4
|
||||
#endif
|
||||
#if defined (DEPTH_5)
|
||||
#define VEC_TYPE float4
|
||||
#endif
|
||||
#if defined (DEPTH_6)
|
||||
#define VEC_TYPE double4
|
||||
#endif
|
||||
|
||||
#if defined (FUNC_TYPE_0)
|
||||
#define FUNC(a,b) b += a;
|
||||
#endif
|
||||
#if defined (FUNC_TYPE_1)
|
||||
#define FUNC(a,b) b = b + (a >= 0 ? a : -a);
|
||||
#endif
|
||||
#if defined (FUNC_TYPE_2)
|
||||
#define FUNC(a,b) b = b + a * a;
|
||||
#endif
|
||||
|
||||
#if defined (REPEAT_S0)
|
||||
#define repeat_s(a,b,c) a=a; b =b; c=c;
|
||||
#endif
|
||||
#if defined (REPEAT_S1)
|
||||
#define repeat_s(a,b,c) a.s0=0; b=b; c=c;
|
||||
#endif
|
||||
#if defined (REPEAT_S2)
|
||||
#define repeat_s(a,b,c) a.s0=0; a.s1=0; b=b; c=c;
|
||||
#endif
|
||||
#if defined (REPEAT_S3)
|
||||
#define repeat_s(a,b,c) a.s0=0; a.s1=0; a.s2=0; b=b; c=c;
|
||||
#endif
|
||||
#if defined (REPEAT_S4)
|
||||
#define repeat_s(a,b,c) a=0;b=b; c=c;
|
||||
#endif
|
||||
#if defined (REPEAT_S5)
|
||||
#define repeat_s(a,b,c) a=0; b.s0=0;c=c;
|
||||
#endif
|
||||
#if defined (REPEAT_S6)
|
||||
#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; c=c;
|
||||
#endif
|
||||
#if defined (REPEAT_S7)
|
||||
#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; b.s2=0; c=c;
|
||||
#endif
|
||||
#if defined (REPEAT_S8)
|
||||
#define repeat_s(a,b,c) a=0; b=0; c=c;
|
||||
#endif
|
||||
#if defined (REPEAT_S9)
|
||||
#define repeat_s(a,b,c) a=0; b=0; c.s0=0;
|
||||
#endif
|
||||
#if defined (REPEAT_S10)
|
||||
#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0;
|
||||
#endif
|
||||
#if defined (REPEAT_S11)
|
||||
#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0; c.s2=0;
|
||||
#endif
|
||||
|
||||
#if defined (REPEAT_E0)
|
||||
#define repeat_e(a,b,c) a=a; b =b; c=c;
|
||||
#endif
|
||||
#if defined (REPEAT_E1)
|
||||
#define repeat_e(a,b,c) a=a; b=b; c.s3=0;
|
||||
#endif
|
||||
#if defined (REPEAT_E2)
|
||||
#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0;
|
||||
#endif
|
||||
#if defined (REPEAT_E3)
|
||||
#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0; c.s1=0;
|
||||
#endif
|
||||
#if defined (REPEAT_E4)
|
||||
#define repeat_e(a,b,c) a=a; b=b; c=0;
|
||||
#endif
|
||||
#if defined (REPEAT_E5)
|
||||
#define repeat_e(a,b,c) a=a; b.s3=0; c=0;
|
||||
#endif
|
||||
#if defined (REPEAT_E6)
|
||||
#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; c=0;
|
||||
#endif
|
||||
#if defined (REPEAT_E7)
|
||||
#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; b.s1=0; c=0;
|
||||
#endif
|
||||
#if defined (REPEAT_E8)
|
||||
#define repeat_e(a,b,c) a=a; b=0; c=0;
|
||||
#endif
|
||||
#if defined (REPEAT_E9)
|
||||
#define repeat_e(a,b,c) a.s3=0; b=0; c=0;
|
||||
#endif
|
||||
#if defined (REPEAT_E10)
|
||||
#define repeat_e(a,b,c) a.s3=0; a.s2=0; b=0; c=0;
|
||||
#endif
|
||||
#if defined (REPEAT_E11)
|
||||
#define repeat_e(a,b,c) a.s3=0; a.s2=0; a.s1=0; b=0; c=0;
|
||||
#endif
|
||||
|
||||
__kernel void arithm_op_sum_3 (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
|
||||
__global VEC_TYPE *src, __global RES_TYPE *dst)
|
||||
{
|
||||
unsigned int lid = get_local_id(0);
|
||||
unsigned int gid = get_group_id(0);
|
||||
unsigned int id = get_global_id(0);
|
||||
unsigned int idx = offset + id + (id / cols) * invalid_cols;
|
||||
idx = idx * 3;
|
||||
__local RES_TYPE localmem_sum1[128];
|
||||
__local RES_TYPE localmem_sum2[128];
|
||||
__local RES_TYPE localmem_sum3[128];
|
||||
RES_TYPE sum1 = 0,sum2 = 0,sum3 = 0,temp1,temp2,temp3;
|
||||
if(id < elemnum)
|
||||
{
|
||||
temp1 = CONVERT_RES_TYPE(src[idx]);
|
||||
temp2 = CONVERT_RES_TYPE(src[idx+1]);
|
||||
temp3 = CONVERT_RES_TYPE(src[idx+2]);
|
||||
if(id % cols == 0 )
|
||||
{
|
||||
repeat_s(temp1,temp2,temp3);
|
||||
}
|
||||
if(id % cols == cols - 1)
|
||||
{
|
||||
repeat_e(temp1,temp2,temp3);
|
||||
}
|
||||
FUNC(temp1,sum1);
|
||||
FUNC(temp2,sum2);
|
||||
FUNC(temp3,sum3);
|
||||
}
|
||||
else
|
||||
{
|
||||
sum1 = 0;
|
||||
sum2 = 0;
|
||||
sum3 = 0;
|
||||
}
|
||||
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
|
||||
{
|
||||
idx = offset + id + (id / cols) * invalid_cols;
|
||||
idx = idx * 3;
|
||||
temp1 = CONVERT_RES_TYPE(src[idx]);
|
||||
temp2 = CONVERT_RES_TYPE(src[idx+1]);
|
||||
temp3 = CONVERT_RES_TYPE(src[idx+2]);
|
||||
if(id % cols == 0 )
|
||||
{
|
||||
repeat_s(temp1,temp2,temp3);
|
||||
}
|
||||
if(id % cols == cols - 1)
|
||||
{
|
||||
repeat_e(temp1,temp2,temp3);
|
||||
}
|
||||
FUNC(temp1,sum1);
|
||||
FUNC(temp2,sum2);
|
||||
FUNC(temp3,sum3);
|
||||
}
|
||||
if(lid > 127)
|
||||
{
|
||||
localmem_sum1[lid - 128] = sum1;
|
||||
localmem_sum2[lid - 128] = sum2;
|
||||
localmem_sum3[lid - 128] = sum3;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lid < 128)
|
||||
{
|
||||
localmem_sum1[lid] = sum1 + localmem_sum1[lid];
|
||||
localmem_sum2[lid] = sum2 + localmem_sum2[lid];
|
||||
localmem_sum3[lid] = sum3 + localmem_sum3[lid];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for(int lsize = 64; lsize > 0; lsize >>= 1)
|
||||
{
|
||||
if(lid < lsize)
|
||||
{
|
||||
int lid2 = lsize + lid;
|
||||
localmem_sum1[lid] = localmem_sum1[lid] + localmem_sum1[lid2];
|
||||
localmem_sum2[lid] = localmem_sum2[lid] + localmem_sum2[lid2];
|
||||
localmem_sum3[lid] = localmem_sum3[lid] + localmem_sum3[lid2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if( lid == 0)
|
||||
{
|
||||
dst[gid*3] = localmem_sum1[0];
|
||||
dst[gid*3+1] = localmem_sum2[0];
|
||||
dst[gid*3+2] = localmem_sum3[0];
|
||||
}
|
||||
}
|
@ -220,8 +220,8 @@ PARAM_TEST_CASE(ArithmTestBase, int, int, bool)
|
||||
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
|
||||
src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false);
|
||||
src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -15440, 14450, false);
|
||||
src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 2, 11, false);
|
||||
src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -1540, 1740, false);
|
||||
dst1 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false);
|
||||
dst2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false);
|
||||
mask = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), CV_8UC1, 0, 2, false);
|
||||
@ -464,7 +464,6 @@ TEST_P(Mul, Scalar)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_P(Mul, Mat_Scalar)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
@ -507,7 +506,6 @@ TEST_P(Div, Scalar)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_P(Div, Mat_Scalar)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
@ -753,7 +751,7 @@ TEST_P(MinMax, MAT)
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(MinMax, DISABLED_MASK)
|
||||
TEST_P(MinMax, MASK)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
@ -1022,7 +1020,7 @@ TEST_P(MinMaxLoc, MASK)
|
||||
|
||||
typedef ArithmTestBase Sum;
|
||||
|
||||
TEST_P(Sum, DISABLED_MAT)
|
||||
TEST_P(Sum, MAT)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
@ -1031,7 +1029,121 @@ TEST_P(Sum, DISABLED_MAT)
|
||||
Scalar cpures = cv::sum(src1_roi);
|
||||
Scalar gpures = cv::ocl::sum(gsrc1);
|
||||
|
||||
//check results
|
||||
// check results
|
||||
EXPECT_NEAR(cpures[0], gpures[0], 0.1);
|
||||
EXPECT_NEAR(cpures[1], gpures[1], 0.1);
|
||||
EXPECT_NEAR(cpures[2], gpures[2], 0.1);
|
||||
EXPECT_NEAR(cpures[3], gpures[3], 0.1);
|
||||
}
|
||||
}
|
||||
|
||||
typedef ArithmTestBase SqrSum;
|
||||
|
||||
template <typename T, typename WT>
|
||||
static Scalar sqrSum(const Mat & src)
|
||||
{
|
||||
Scalar sum = Scalar::all(0);
|
||||
int cn = src.channels();
|
||||
WT data[4] = { 0, 0, 0, 0 };
|
||||
|
||||
int cols = src.cols * cn;
|
||||
for (int y = 0; y < src.rows; ++y)
|
||||
{
|
||||
const T * const sdata = src.ptr<T>(y);
|
||||
for (int x = 0; x < cols; )
|
||||
for (int i = 0; i < cn; ++i, ++x)
|
||||
{
|
||||
WT t = static_cast<WT>(sdata[x]);
|
||||
data[i] += t * t;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < cn; ++i)
|
||||
sum[i] = static_cast<double>(data[i]);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
typedef Scalar (*sumFunc)(const Mat &);
|
||||
|
||||
TEST_P(SqrSum, MAT)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
static sumFunc funcs[] = { sqrSum<uchar, int>,
|
||||
sqrSum<char, int>,
|
||||
sqrSum<ushort, int>,
|
||||
sqrSum<short, int>,
|
||||
sqrSum<int, int>,
|
||||
sqrSum<float, double>,
|
||||
sqrSum<double, double>,
|
||||
0 };
|
||||
|
||||
sumFunc func = funcs[src1_roi.depth()];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
Scalar cpures = func(src1_roi);
|
||||
Scalar gpures = cv::ocl::sqrSum(gsrc1);
|
||||
|
||||
// check results
|
||||
EXPECT_NEAR(cpures[0], gpures[0], 1.0);
|
||||
EXPECT_NEAR(cpures[1], gpures[1], 1.0);
|
||||
EXPECT_NEAR(cpures[2], gpures[2], 1.0);
|
||||
EXPECT_NEAR(cpures[3], gpures[3], 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
typedef ArithmTestBase AbsSum;
|
||||
|
||||
template <typename T, typename WT>
|
||||
static Scalar absSum(const Mat & src)
|
||||
{
|
||||
Scalar sum = Scalar::all(0);
|
||||
int cn = src.channels();
|
||||
WT data[4] = { 0, 0, 0, 0 };
|
||||
|
||||
int cols = src.cols * cn;
|
||||
for (int y = 0; y < src.rows; ++y)
|
||||
{
|
||||
const T * const sdata = src.ptr<T>(y);
|
||||
for (int x = 0; x < cols; )
|
||||
for (int i = 0; i < cn; ++i, ++x)
|
||||
{
|
||||
WT t = static_cast<WT>(sdata[x]);
|
||||
data[i] += t >= 0 ? t : -t;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < cn; ++i)
|
||||
sum[i] = static_cast<double>(data[i]);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
TEST_P(AbsSum, MAT)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
static sumFunc funcs[] = { absSum<uchar, int>,
|
||||
absSum<char, int>,
|
||||
absSum<ushort, int>,
|
||||
absSum<short, int>,
|
||||
absSum<int, int>,
|
||||
absSum<float, double>,
|
||||
absSum<double, double>,
|
||||
0 };
|
||||
|
||||
sumFunc func = funcs[src1_roi.depth()];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
Scalar cpures = func(src1_roi);
|
||||
Scalar gpures = cv::ocl::absSum(gsrc1);
|
||||
|
||||
// check results
|
||||
EXPECT_NEAR(cpures[0], gpures[0], 0.1);
|
||||
EXPECT_NEAR(cpures[1], gpures[1], 0.1);
|
||||
EXPECT_NEAR(cpures[2], gpures[2], 0.1);
|
||||
@ -1059,17 +1171,27 @@ TEST_P(CountNonZero, MAT)
|
||||
|
||||
typedef ArithmTestBase Phase;
|
||||
|
||||
TEST_P(Phase, DISABLED_Mat)
|
||||
TEST_P(Phase, angleInDegrees)
|
||||
{
|
||||
for (int angelInDegrees = 0; angelInDegrees < 2; angelInDegrees++)
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
cv::phase(src1_roi, src2_roi, dst1_roi, angelInDegrees ? true : false);
|
||||
cv::ocl::phase(gsrc1, gsrc2, gdst1, angelInDegrees ? true : false);
|
||||
Near(1e-2);
|
||||
}
|
||||
random_roi();
|
||||
cv::phase(src1_roi, src2_roi, dst1_roi, true);
|
||||
cv::ocl::phase(gsrc1, gsrc2, gdst1, true);
|
||||
|
||||
Near(1e-2);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Phase, angleInRadians)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
cv::phase(src1_roi, src2_roi, dst1_roi);
|
||||
cv::ocl::phase(gsrc1, gsrc2, gdst1);
|
||||
|
||||
Near(1e-2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1301,32 +1423,136 @@ TEST_P(AddWeighted, Mat)
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////// setIdentity /////////////////////////////////////////////////
|
||||
|
||||
typedef ArithmTestBase SetIdentity;
|
||||
|
||||
TEST_P(SetIdentity, Mat)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
cv::setIdentity(dst1_roi, val);
|
||||
cv::ocl::setIdentity(gdst1, val);
|
||||
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////// meanStdDev /////////////////////////////////////////////////
|
||||
|
||||
typedef ArithmTestBase MeanStdDev;
|
||||
|
||||
TEST_P(MeanStdDev, Mat)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
Scalar cpu_mean, cpu_stddev;
|
||||
Scalar gpu_mean, gpu_stddev;
|
||||
|
||||
cv::meanStdDev(src1_roi, cpu_mean, cpu_stddev);
|
||||
cv::ocl::meanStdDev(gsrc1, gpu_mean, gpu_stddev);
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 0.1);
|
||||
EXPECT_NEAR(cpu_stddev[i], gpu_stddev[i], 0.1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////// Norm /////////////////////////////////////////////////
|
||||
|
||||
typedef ArithmTestBase Norm;
|
||||
|
||||
TEST_P(Norm, NORM_INF)
|
||||
{
|
||||
for (int relative = 0; relative < 2; ++relative)
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
int type = NORM_INF;
|
||||
if (relative == 1)
|
||||
type |= NORM_RELATIVE;
|
||||
|
||||
const double cpuRes = cv::norm(src1_roi, src2_roi, type);
|
||||
const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type);
|
||||
|
||||
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Norm, NORM_L1)
|
||||
{
|
||||
for (int relative = 0; relative < 2; ++relative)
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
int type = NORM_L1;
|
||||
if (relative == 1)
|
||||
type |= NORM_RELATIVE;
|
||||
|
||||
const double cpuRes = cv::norm(src1_roi, src2_roi, type);
|
||||
const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type);
|
||||
|
||||
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Norm, NORM_L2)
|
||||
{
|
||||
for (int relative = 0; relative < 2; ++relative)
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
int type = NORM_L2;
|
||||
if (relative == 1)
|
||||
type |= NORM_RELATIVE;
|
||||
|
||||
const double cpuRes = cv::norm(src1_roi, src2_roi, type);
|
||||
const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type);
|
||||
|
||||
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////// Instantiation /////////////////////////////////////////
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // +
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, SqrSum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, AbsSum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, SetIdentity, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
@ -1,63 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
typedef ::testing::TestWithParam<cv::Size> normFixture;
|
||||
|
||||
TEST_P(normFixture, DISABLED_accuracy)
|
||||
{
|
||||
const cv::Size srcSize = GetParam();
|
||||
|
||||
cv::Mat src1(srcSize, CV_8UC1), src2(srcSize, CV_8UC1);
|
||||
cv::randu(src1, 0, 2);
|
||||
cv::randu(src2, 0, 2);
|
||||
|
||||
cv::ocl::oclMat oclSrc1(src1), oclSrc2(src2);
|
||||
|
||||
double value = cv::norm(src1, src2, cv::NORM_INF);
|
||||
double oclValue = cv::ocl::norm(oclSrc1, oclSrc2, cv::NORM_INF);
|
||||
|
||||
ASSERT_EQ(value, oclValue);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(oclNormTest, normFixture,
|
||||
::testing::Values(cv::Size(500, 500), cv::Size(1000, 1000)));
|
Loading…
Reference in New Issue
Block a user