Merge pull request #1519 from ilya-lavrenov:ocl_minMax

This commit is contained in:
Andrey Pavlenko
2013-10-01 20:16:47 +04:00
committed by OpenCV Buildbot
11 changed files with 874 additions and 1116 deletions

View File

@@ -584,7 +584,8 @@ namespace cv
CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0); CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
CV_EXPORTS void setIdentity(oclMat& src, double val); //! initializes a scaled identity matrix
CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
//////////////////////////////// Filter Engine //////////////////////////////// //////////////////////////////// Filter Engine ////////////////////////////////

View File

@@ -64,11 +64,10 @@ namespace cv
{ {
//////////////////////////////// OpenCL kernel strings ///////////////////// //////////////////////////////// OpenCL kernel strings /////////////////////
extern const char *arithm_absdiff_nonsaturate;
extern const char *arithm_nonzero; extern const char *arithm_nonzero;
extern const char *arithm_sum; extern const char *arithm_sum;
extern const char *arithm_sum_3;
extern const char *arithm_minMax; extern const char *arithm_minMax;
extern const char *arithm_minMax_mask;
extern const char *arithm_minMaxLoc; extern const char *arithm_minMaxLoc;
extern const char *arithm_minMaxLoc_mask; extern const char *arithm_minMaxLoc_mask;
extern const char *arithm_LUT; extern const char *arithm_LUT;
@@ -318,21 +317,28 @@ void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int
////////////////////////////////// sum ////////////////////////////////////// ////////////////////////////////// sum //////////////////////////////////////
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
//type = 0 sum,type = 1 absSum,type = 2 sqrSum enum { SUM = 0, ABS_SUM, SQR_SUM };
static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, int type = 0)
static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int groupnum, int type, int ddepth)
{ {
vector<pair<size_t , const void *> > args; int ochannels = src.oclchannels();
int all_cols = src.step / (vlen * src.elemSize1()); int all_cols = src.step / src.elemSize();
int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1()); int pre_cols = (src.offset % src.step) / src.elemSize();
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1; int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
int invalid_cols = pre_cols + sec_cols; int invalid_cols = pre_cols + sec_cols;
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
int offset = src.offset / (vlen * src.elemSize1()); int offset = src.offset / src.elemSize();
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
char build_options[512]; const char * const funcMap[] = { "FUNC_SUM", "FUNC_ABS_SUM", "FUNC_SQR_SUM" };
CV_Assert(type == 0 || type == 1 || type == 2); const char * const channelMap[] = { " ", " ", "2", "4", "4" };
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d -D FUNC_TYPE_%d", src.depth(), repeat_s, repeat_e, type); string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s -D %s",
typeMap[src.depth()], channelMap[ochannels],
typeMap[ddepth], channelMap[ochannels],
typeMap[ddepth], channelMap[ochannels],
funcMap[type]);
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset));
@@ -340,55 +346,63 @@ static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen ,
args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; size_t globalThreads[3] = { groupnum * 256, 1, 1 };
if (src.oclchannels() != 3) size_t localThreads[3] = { 256, 1, 1 };
openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", gt, lt, args, -1, -1, build_options);
else openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", globalThreads, localThreads,
openCLExecuteKernel(src.clCxt, &arithm_sum_3, "arithm_op_sum_3", gt, lt, args, -1, -1, build_options); args, -1, -1, buildOptions.c_str());
} }
template <typename T> template <typename T>
Scalar arithmetic_sum(const oclMat &src, int type = 0) Scalar arithmetic_sum(const oclMat &src, int type, int ddepth)
{ {
CV_Assert(src.step % src.elemSize() == 0);
size_t groupnum = src.clCxt->computeUnits(); size_t groupnum = src.clCxt->computeUnits();
CV_Assert(groupnum != 0); CV_Assert(groupnum != 0);
int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen;
int dbsize = groupnum * src.oclchannels();
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;
AutoBuffer<T> _buf(dbsize); AutoBuffer<T> _buf(dbsize);
T *p = (T*)_buf; T *p = (T*)_buf;
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T));
Scalar s = Scalar::all(0.0);
arithmetic_sum_buffer_run(src, dstBuffer, vlen, groupnum, type);
memset(p, 0, dbsize * sizeof(T)); memset(p, 0, dbsize * sizeof(T));
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T));
arithmetic_sum_buffer_run(src, dstBuffer, groupnum, type, ddepth);
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T)); openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T));
openCLFree(dstBuffer);
Scalar s = Scalar::all(0.0);
for (int i = 0; i < dbsize;) for (int i = 0; i < dbsize;)
{
for (int j = 0; j < src.oclchannels(); j++, i++) for (int j = 0; j < src.oclchannels(); j++, i++)
s.val[j] += p[i]; s.val[j] += p[i];
}
openCLFree(dstBuffer);
return s; return s;
} }
typedef Scalar (*sumFunc)(const oclMat &src, int type); typedef Scalar (*sumFunc)(const oclMat &src, int type, int ddepth);
Scalar cv::ocl::sum(const oclMat &src) Scalar cv::ocl::sum(const oclMat &src)
{ {
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
} }
static sumFunc functab[2] = static sumFunc functab[3] =
{ {
arithmetic_sum<int>,
arithmetic_sum<float>, arithmetic_sum<float>,
arithmetic_sum<double> arithmetic_sum<double>
}; };
sumFunc func; bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; int ddepth = std::max(src.depth(), CV_32S);
return func(src, 0); if (!hasDouble && ddepth == CV_64F)
ddepth = CV_32F;
sumFunc func = functab[ddepth - CV_32S];
return func(src, SUM, ddepth);
} }
Scalar cv::ocl::absSum(const oclMat &src) Scalar cv::ocl::absSum(const oclMat &src)
@@ -397,15 +411,20 @@ Scalar cv::ocl::absSum(const oclMat &src)
{ {
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
} }
static sumFunc functab[2] = static sumFunc functab[3] =
{ {
arithmetic_sum<int>,
arithmetic_sum<float>, arithmetic_sum<float>,
arithmetic_sum<double> arithmetic_sum<double>
}; };
sumFunc func; bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; int ddepth = std::max(src.depth(), CV_32S);
return func(src, 1); if (!hasDouble && ddepth == CV_64F)
ddepth = CV_32F;
sumFunc func = functab[ddepth - CV_32S];
return func(src, ABS_SUM, ddepth);
} }
Scalar cv::ocl::sqrSum(const oclMat &src) Scalar cv::ocl::sqrSum(const oclMat &src)
@@ -414,15 +433,18 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
{ {
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
} }
static sumFunc functab[2] = static sumFunc functab[3] =
{ {
arithmetic_sum<float>, arithmetic_sum<int>,
arithmetic_sum<double>,
arithmetic_sum<double> arithmetic_sum<double>
}; };
sumFunc func; bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F);
return func(src, 2);
sumFunc func = functab[ddepth - CV_32S];
return func(src, SQR_SUM, ddepth);
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
@@ -431,23 +453,15 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
{ {
CV_Assert(src.depth() <= CV_32S); double total = 1.0 / src.size().area();
cv::Size sz(1, 1);
int channels = src.oclchannels();
Mat m1(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)),
m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0));
oclMat dst1(m1), dst2(m2);
//arithmetic_sum_run(src, dst1,"arithm_op_sum"); mean = sum(src);
//arithmetic_sum_run(src, dst2,"arithm_op_squares_sum"); stddev = sqrSum(src);
m1 = (Mat)dst1; for (int i = 0; i < 4; ++i)
m2 = (Mat)dst2;
int i = 0, *p = (int *)m1.data, *q = (int *)m2.data;
for (; i < channels; i++)
{ {
mean.val[i] = (double)p[i] / (src.cols * src.rows); mean[i] *= total;
stddev.val[i] = std::sqrt(std::max((double) q[i] / (src.cols * src.rows) - mean.val[i] * mean.val[i] , 0.)); stddev[i] = std::sqrt(std::max(stddev[i] * total - mean.val[i] * mean.val[i] , 0.));
} }
} }
@@ -455,139 +469,120 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
//////////////////////////////////// minMax ///////////////////////////////// //////////////////////////////////// minMax /////////////////////////////////
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
static void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen , int groupnum, string kernelName) template <typename T, typename WT>
static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem &dst, int groupnum, string kernelName)
{ {
vector<pair<size_t , const void *> > args; int all_cols = src.step / src.elemSize();
int all_cols = src.step / (vlen * src.elemSize1()); int pre_cols = (src.offset % src.step) / src.elemSize();
int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1()); int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1;
int invalid_cols = pre_cols + sec_cols; int invalid_cols = pre_cols + sec_cols;
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
int offset = src.offset / (vlen * src.elemSize1()); int offset = src.offset / src.elemSize();
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
char build_options[50]; const char * const channelMap[] = { " ", " ", "2", "4", "4" };
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e);
ostringstream stream;
stream << "-D T=" << typeMap[src.depth()] << channelMap[src.channels()];
stream << " -D MAX_VAL=" << (WT)numeric_limits<T>::max();
stream << " -D MIN_VAL=" << (WT)numeric_limits<T>::min();
string buildOptions = stream.str();
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset));
args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum)); args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum));
args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
int minvalid_cols = 0, moffset = 0;
if (!mask.empty()) if (!mask.empty())
{ {
int mall_cols = mask.step / (vlen * mask.elemSize1()); int mall_cols = mask.step / mask.elemSize();
int mpre_cols = (mask.offset % mask.step) / (vlen * mask.elemSize1()); int mpre_cols = (mask.offset % mask.step) / mask.elemSize();
int msec_cols = mall_cols - (mask.offset % mask.step + mask.cols * mask.elemSize() - 1) / (vlen * mask.elemSize1()) - 1; int msec_cols = mall_cols - (mask.offset % mask.step + mask.cols * mask.elemSize() - 1) / mask.elemSize() - 1;
int minvalid_cols = mpre_cols + msec_cols; minvalid_cols = mpre_cols + msec_cols;
int moffset = mask.offset / (vlen * mask.elemSize1()); moffset = mask.offset / mask.elemSize();
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&minvalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&minvalid_cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
kernelName += "_mask";
} }
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; size_t globalThreads[3] = {groupnum * 256, 1, 1};
openCLExecuteKernel(src.clCxt, &arithm_minMax, kernelName, gt, lt, args, -1, -1, build_options); size_t localThreads[3] = {256, 1, 1};
openCLExecuteKernel(src.clCxt, &arithm_minMax, kernelName, globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
} }
template <typename T, typename WT>
static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen, int groupnum, string kernelName) void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
{
vector<pair<size_t , const void *> > args;
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
char build_options[50];
if (src.oclchannels() == 1)
{
int cols = (src.cols - 1) / vlen + 1;
int invalid_cols = src.step / (vlen * src.elemSize1()) - cols;
int offset = src.offset / src.elemSize1();
int repeat_me = vlen - (mask.cols % vlen == 0 ? vlen : mask.cols % vlen);
int minvalid_cols = mask.step / (vlen * mask.elemSize1()) - cols;
int moffset = mask.offset / mask.elemSize1();
int elemnum = cols * src.rows;
sprintf(build_options, "-D DEPTH_%d -D REPEAT_E%d", src.depth(), repeat_me);
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset));
args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum));
args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( make_pair( sizeof(cl_int) , (void *)&minvalid_cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
openCLExecuteKernel(src.clCxt, &arithm_minMax_mask, kernelName, gt, lt, args, -1, -1, build_options);
}
}
template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
const oclMat &mask, oclMat &buf) const oclMat &mask, oclMat &buf)
{ {
size_t groupnum = src.clCxt->computeUnits(); size_t groupnum = src.clCxt->computeUnits();
CV_Assert(groupnum != 0); CV_Assert(groupnum != 0);
groupnum = groupnum * 2;
int vlen = 8;
int dbsize = groupnum * 2 * vlen * sizeof(T) ;
int dbsize = groupnum * 2 * src.elemSize();
ensureSizeIsEnough(1, dbsize, CV_8UC1, buf); ensureSizeIsEnough(1, dbsize, CV_8UC1, buf);
cl_mem buf_data = reinterpret_cast<cl_mem>(buf.data); cl_mem buf_data = reinterpret_cast<cl_mem>(buf.data);
arithmetic_minMax_run<T, WT>(src, mask, buf_data, groupnum, "arithm_op_minMax");
if (mask.empty())
{
arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax");
}
else
{
arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask");
}
Mat matbuf = Mat(buf); Mat matbuf = Mat(buf);
T *p = matbuf.ptr<T>(); T *p = matbuf.ptr<T>();
if (minVal != NULL) if (minVal != NULL)
{ {
*minVal = std::numeric_limits<double>::max(); *minVal = std::numeric_limits<double>::max();
for (int i = 0; i < vlen * (int)groupnum; i++) for (int i = 0, end = src.oclchannels() * (int)groupnum; i < end; i++)
{
*minVal = *minVal < p[i] ? *minVal : p[i]; *minVal = *minVal < p[i] ? *minVal : p[i];
} }
}
if (maxVal != NULL) if (maxVal != NULL)
{ {
*maxVal = -std::numeric_limits<double>::max(); *maxVal = -std::numeric_limits<double>::max();
for (int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++) for (int i = src.oclchannels() * (int)groupnum, end = i << 1; i < end; i++)
{
*maxVal = *maxVal > p[i] ? *maxVal : p[i]; *maxVal = *maxVal > p[i] ? *maxVal : p[i];
} }
}
} }
typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf);
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
{ {
oclMat buf; oclMat buf;
minMax_buf(src, minVal, maxVal, mask, buf); minMax_buf(src, minVal, maxVal, mask, buf);
} }
typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf);
void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf)
{ {
CV_Assert(src.oclchannels() == 1); CV_Assert(src.channels() == 1);
CV_Assert(src.size() == mask.size() || mask.empty());
CV_Assert(src.step % src.elemSize() == 0);
if (minVal == NULL && maxVal == NULL)
return;
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
} }
static minMaxFunc functab[8] = static minMaxFunc functab[8] =
{ {
arithmetic_minMax<uchar>, arithmetic_minMax<uchar, int>,
arithmetic_minMax<char>, arithmetic_minMax<char, int>,
arithmetic_minMax<ushort>, arithmetic_minMax<ushort, int>,
arithmetic_minMax<short>, arithmetic_minMax<short, int>,
arithmetic_minMax<int>, arithmetic_minMax<int, int>,
arithmetic_minMax<float>, arithmetic_minMax<float, float>,
arithmetic_minMax<double>, arithmetic_minMax<double, double>,
0 0
}; };
minMaxFunc func; minMaxFunc func;
func = functab[src.depth()]; func = functab[src.depth()];
func(src, minVal, maxVal, mask, buf); func(src, minVal, maxVal, mask, buf);
@@ -599,57 +594,102 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons
double cv::ocl::norm(const oclMat &src1, int normType) double cv::ocl::norm(const oclMat &src1, int normType)
{ {
return norm(src1, oclMat(src1.size(), src1.type(), Scalar::all(0)), normType); CV_Assert((normType & NORM_RELATIVE) == 0);
return norm(src1, oclMat(), normType);
}
static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & src2, oclMat & diff)
{
CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0));
Context *clCxt = src1.clCxt;
int ddepth = CV_64F;
diff.create(src1.size(), CV_MAKE_TYPE(ddepth, src1.channels()));
int oclChannels = src1.oclchannels(), sdepth = src1.depth();
int src1step1 = src1.step / src1.elemSize(), src1offset1 = src1.offset / src1.elemSize();
int src2step1 = src2.step / src2.elemSize(), src2offset1 = src2.offset / src2.elemSize();
int diffstep1 = diff.step / diff.elemSize(), diffoffset1 = diff.offset / diff.elemSize();
string kernelName = "arithm_absdiff_nonsaturate";
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { diff.cols, diff.rows, 1 };
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char * const channelMap[] = { "", "", "2", "4", "4" };
std::string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s",
typeMap[sdepth], channelMap[oclChannels],
typeMap[ddepth], channelMap[oclChannels],
typeMap[ddepth], channelMap[oclChannels]);
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 ));
if (!src2.empty())
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 ));
kernelName += "_binary";
}
args.push_back( make_pair( sizeof(cl_mem), (void *)&diff.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&diffstep1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&diffoffset1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
openCLExecuteKernel(clCxt, &arithm_absdiff_nonsaturate,
kernelName, globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
} }
double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
{ {
CV_Assert(!src1.empty());
CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size()));
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
bool isRelative = (normType & NORM_RELATIVE) != 0; bool isRelative = (normType & NORM_RELATIVE) != 0;
normType &= 7; normType &= NORM_TYPE_MASK;
CV_Assert(src1.depth() <= CV_32S && src1.type() == src2.type() && ( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2)); CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
int channels = src1.oclchannels(), i = 0, *p;
Scalar s;
int cn = src1.channels();
double r = 0; double r = 0;
oclMat gm1(src1.size(), src1.type()); oclMat diff;
int min_int = (normType == NORM_INF ? CL_INT_MIN : 0); arithm_absdiff_nonsaturate_run(src1, src2, diff);
Mat m(1, 1, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(min_int));
oclMat gm2(m), emptyMat; switch (normType)
switch(normType)
{ {
case NORM_INF: case NORM_INF:
// arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); diff = diff.reshape(1);
//arithmetic_minMax_run(gm1,emptyMat, gm2,"arithm_op_max"); minMax(diff, NULL, &r);
m = (gm2);
p = (int *)m.data;
r = -std::numeric_limits<double>::max();
for (i = 0; i < channels; i++)
{
r = std::max(r, (double)p[i]);
}
break; break;
case NORM_L1: case NORM_L1:
//arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); s = sum(diff);
//arithmetic_sum_run(gm1, gm2,"arithm_op_sum"); for (int i = 0; i < cn; ++i)
m = (gm2); r += s[i];
p = (int *)m.data;
for (i = 0; i < channels; i++)
{
r = r + (double)p[i];
}
break; break;
case NORM_L2: case NORM_L2:
//arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); s = sqrSum(diff);
//arithmetic_sum_run(gm1, gm2,"arithm_op_squares_sum"); for (int i = 0; i < cn; ++i)
m = (gm2); r += s[i];
p = (int *)m.data;
for (i = 0; i < channels; i++)
{
r = r + (double)p[i];
}
r = std::sqrt(r); r = std::sqrt(r);
break; break;
} }
if (isRelative) if (isRelative)
r = r / norm(src2, normType); r = r / norm(src2, normType);
return r; return r;
} }
@@ -923,47 +963,38 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
return; return;
} }
CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && src1.rows == src2.rows && src2.rows == dst.rows);
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
Context *clCxt = src1.clCxt; Context *clCxt = src1.clCxt;
int channels = dst.oclchannels(); int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels();
int depth = dst.depth(); int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1();
int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1();
size_t vector_length = 1; int dststep1 = dst.step / dst.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1();
int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 }; size_t localThreads[3] = { 64, 4, 1 };
size_t globalThreads[3] = { cols, dst.rows, 1 }; size_t globalThreads[3] = { cols1, dst.rows, 1 };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
} }
void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angleInDegrees) void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees)
{ {
CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F)); CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F));
CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0);
Angle.create(x.size(), x.type()); Angle.create(x.size(), x.type());
string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians"; arithmetic_phase_run(x, y, Angle, angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians", &arithm_phase);
if (angleInDegrees)
arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
else
arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
@@ -1228,21 +1259,22 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
///////////////////////////// countNonZero /////////////////////////////////// ///////////////////////////// countNonZero ///////////////////////////////////
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, string kernelName) static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int groupnum, string kernelName)
{ {
vector<pair<size_t , const void *> > args; int ochannels = src.oclchannels();
int all_cols = src.step / (vlen * src.elemSize1()); int all_cols = src.step / src.elemSize();
int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1()); int pre_cols = (src.offset % src.step) / src.elemSize();
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1; int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
int invalid_cols = pre_cols + sec_cols; int invalid_cols = pre_cols + sec_cols;
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
int offset = src.offset / (vlen * src.elemSize1()); int offset = src.offset / src.elemSize();
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels();
char build_options[50]; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e); const char * const channelMap[] = { " ", " ", "2", "4", "4" };
string buildOptions = format("-D srcT=%s%s -D dstT=int%s", typeMap[src.depth()], channelMap[ochannels],
channelMap[ochannels]);
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset));
@@ -1250,33 +1282,44 @@ static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen
args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, gt, lt, args, -1, -1, build_options); size_t globalThreads[3] = { groupnum * 256, 1, 1 };
size_t localThreads[3] = { 256, 1, 1 };
openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
} }
int cv::ocl::countNonZero(const oclMat &src) int cv::ocl::countNonZero(const oclMat &src)
{ {
size_t groupnum = src.clCxt->computeUnits(); CV_Assert(src.step % src.elemSize() == 0);
CV_Assert(src.channels() == 1);
Context *clCxt = src.clCxt;
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "selected device doesn't support double"); CV_Error(CV_GpuNotSupported, "selected device doesn't support double");
} }
size_t groupnum = src.clCxt->computeUnits();
CV_Assert(groupnum != 0); CV_Assert(groupnum != 0);
int vlen = 8 , dbsize = groupnum * vlen; int dbsize = groupnum;
Context *clCxt = src.clCxt;
string kernelName = "arithm_op_nonzero"; string kernelName = "arithm_op_nonzero";
AutoBuffer<int> _buf(dbsize); AutoBuffer<int> _buf(dbsize);
int *p = (int*)_buf, nonzero = 0; int *p = (int*)_buf, nonzero = 0;
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int));
arithmetic_countNonZero_run(src, dstBuffer, vlen, groupnum, kernelName);
memset(p, 0, dbsize * sizeof(int)); memset(p, 0, dbsize * sizeof(int));
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int));
arithmetic_countNonZero_run(src, dstBuffer, groupnum, kernelName);
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(int)); openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(int));
for (int i = 0; i < dbsize; i++) for (int i = 0; i < dbsize; i++)
nonzero += p[i]; nonzero += p[i];
openCLSafeCall(clReleaseMemObject(dstBuffer)); openCLSafeCall(clReleaseMemObject(dstBuffer));
return nonzero; return nonzero;
} }
@@ -1523,7 +1566,7 @@ oclMatExpr::operator oclMat() const
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
#define TILE_DIM (32) #define TILE_DIM (32)
#define BLOCK_ROWS (256/TILE_DIM) #define BLOCK_ROWS (256 / TILE_DIM)
static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false) static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false)
{ {
@@ -1702,63 +1745,35 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
/////////////////////////////// setIdentity ////////////////////////////////// /////////////////////////////// setIdentity //////////////////////////////////
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
void cv::ocl::setIdentity(oclMat& src, double scalar) void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar)
{ {
CV_Assert(src.empty() == false && src.rows == src.cols);
CV_Assert(src.type() == CV_32SC1 || src.type() == CV_32FC1);
int src_step = src.step/src.elemSize();
Context *clCxt = Context::getContext(); Context *clCxt = Context::getContext();
size_t local_threads[] = {16, 16, 1}; if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
size_t global_threads[] = {src.cols, src.rows, 1};
string kernelName = "setIdentityKernel";
if (src.type() == CV_32FC1)
kernelName += "_F1";
else if (src.type() == CV_32SC1)
kernelName += "_I1";
else
{ {
kernelName += "_D1"; CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
if (!(clCxt->supportsFeature(Context::CL_DOUBLE))) return;
{
oclMat temp;
src.convertTo(temp, CV_32FC1);
temp.copyTo(src);
} }
} CV_Assert(src.step % src.elemSize() == 0);
int src_step1 = src.step / src.elemSize(), src_offset1 = src.offset / src.elemSize();
size_t local_threads[] = { 16, 16, 1 };
size_t global_threads[] = { src.cols, src.rows, 1 };
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char * const channelMap[] = { "", "", "2", "4", "4" };
string buildOptions = format("-D T=%s%s", typeMap[src.depth()], channelMap[src.oclchannels()]);
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&src_step1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step )); args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
int scalar_i = 0; oclMat sc(1, 1, src.type(), scalar);
float scalar_f = 0.0f; args.push_back( make_pair( sizeof(cl_mem), (void *)&sc.data ));
if (clCxt->supportsFeature(Context::CL_DOUBLE))
{
if (src.type() == CV_32SC1)
{
scalar_i = (int)scalar;
args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i));
}
else
args.push_back(make_pair(sizeof(cl_double), (void*)&scalar));
}
else
{
if (src.type() == CV_32SC1)
{
scalar_i = (int)scalar;
args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i));
}
else
{
scalar_f = (float)scalar;
args.push_back(make_pair(sizeof(cl_float), (void*)&scalar_f));
}
}
openCLExecuteKernel(clCxt, &arithm_setidentity, kernelName, global_threads, local_threads, args, -1, -1); openCLExecuteKernel(clCxt, &arithm_setidentity, "setIdentity", global_threads, local_threads,
args, -1, -1, buildOptions.c_str());
} }

View File

@@ -0,0 +1,93 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
__kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset,
__global srcT *src2, int src2_step, int src2_offset,
__global dstT *dst, int dst_step, int dst_offset,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
dstT t0 = convertToDstT(src1[src1_index]);
dstT t1 = convertToDstT(src2[src2_index]);
dstT t2 = t0 - t1;
dst[dst_index] = t2 >= 0 ? t2 : -t2;
}
}
__kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset,
__global dstT *dst, int dst_step, int dst_offset,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
dstT t0 = convertToDstT(src1[src1_index]);
dst[dst_index] = t0 >= 0 ? t0 : -t0;
}
}

View File

@@ -53,169 +53,117 @@
#endif #endif
#endif #endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar8
#define CONVERT_TYPE convert_uchar8
#define MIN_VAL 0
#define MAX_VAL 255
#endif
#if defined (DEPTH_1)
#define VEC_TYPE char8
#define CONVERT_TYPE convert_char8
#define MIN_VAL -128
#define MAX_VAL 127
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort8
#define CONVERT_TYPE convert_ushort8
#define MIN_VAL 0
#define MAX_VAL 65535
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short8
#define CONVERT_TYPE convert_short8
#define MIN_VAL -32768
#define MAX_VAL 32767
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int8
#define CONVERT_TYPE convert_int8
#define MIN_VAL INT_MIN
#define MAX_VAL INT_MAX
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float8
#define CONVERT_TYPE convert_float8
#define MIN_VAL (-FLT_MAX)
#define MAX_VAL FLT_MAX
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double8
#define CONVERT_TYPE convert_double8
#define MIN_VAL (-DBL_MAX)
#define MAX_VAL DBL_MAX
#endif
#if defined (REPEAT_S0)
#define repeat_s(a) a = a;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a) a.s0 = a.s1;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a) a.s0 = a.s2;a.s1 = a.s2;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a) a.s0 = a.s3;a.s1 = a.s3;a.s2 = a.s3;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a) a.s0 = a.s4;a.s1 = a.s4;a.s2 = a.s4;a.s3 = a.s4;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a) a.s0 = a.s5;a.s1 = a.s5;a.s2 = a.s5;a.s3 = a.s5;a.s4 = a.s5;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a) a.s0 = a.s6;a.s1 = a.s6;a.s2 = a.s6;a.s3 = a.s6;a.s4 = a.s6;a.s5 = a.s6;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a) a.s0 = a.s7;a.s1 = a.s7;a.s2 = a.s7;a.s3 = a.s7;a.s4 = a.s7;a.s5 = a.s7;a.s6 = a.s7;
#endif
#if defined (REPEAT_E0)
#define repeat_e(a) a = a;
#endif
#if defined (REPEAT_E1)
#define repeat_e(a) a.s7 = a.s6;
#endif
#if defined (REPEAT_E2)
#define repeat_e(a) a.s7 = a.s5;a.s6 = a.s5;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a) a.s7 = a.s4;a.s6 = a.s4;a.s5 = a.s4;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a) a.s7 = a.s3;a.s6 = a.s3;a.s5 = a.s3;a.s4 = a.s3;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a) a.s7 = a.s2;a.s6 = a.s2;a.s5 = a.s2;a.s4 = a.s2;a.s3 = a.s2;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a) a.s7 = a.s1;a.s6 = a.s1;a.s5 = a.s1;a.s4 = a.s1;a.s3 = a.s1;a.s2 = a.s1;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a) a.s7 = a.s0;a.s6 = a.s0;a.s5 = a.s0;a.s4 = a.s0;a.s3 = a.s0;a.s2 = a.s0;a.s1 = a.s0;
#endif
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
/**************************************Array minMax**************************************/ /**************************************Array minMax**************************************/
__kernel void arithm_op_minMax (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global VEC_TYPE *dst) __kernel void arithm_op_minMax(__global const T * src, __global T * dst,
int cols, int invalid_cols, int offset, int elemnum, int groupnum)
{ {
unsigned int lid = get_local_id(0); unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0); unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0); unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols; unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local VEC_TYPE localmem_max[128],localmem_min[128];
VEC_TYPE minval,maxval,temp; __local T localmem_max[128], localmem_min[128];
if(id < elemnum) T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
{
temp = src[idx]; for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
minval = temp;
maxval = temp;
}
else
{
minval = MAX_VAL;
maxval = MIN_VAL;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{ {
idx = offset + id + (id / cols) * invalid_cols; idx = offset + id + (id / cols) * invalid_cols;
temp = src[idx]; temp = src[idx];
if(id % cols == 0 ) minval = min(minval, temp);
{ maxval = max(maxval, temp);
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
minval = min(minval,temp);
maxval = max(maxval,temp);
} }
if(lid > 127) if(lid > 127)
{ {
localmem_min[lid - 128] = minval; localmem_min[lid - 128] = minval;
localmem_max[lid - 128] = maxval; localmem_max[lid - 128] = maxval;
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128) if(lid < 128)
{ {
localmem_min[lid] = min(minval,localmem_min[lid]); localmem_min[lid] = min(minval, localmem_min[lid]);
localmem_max[lid] = max(maxval,localmem_max[lid]); localmem_max[lid] = max(maxval, localmem_max[lid]);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
for (int lsize = 64; lsize > 0; lsize >>= 1)
{ {
if(lid < lsize) if (lid < lsize)
{ {
int lid2 = lsize + lid; int lid2 = lsize + lid;
localmem_min[lid] = min(localmem_min[lid] , localmem_min[lid2]); localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid] , localmem_max[lid2]); localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if( lid == 0)
if (lid == 0)
{
dst[gid] = localmem_min[0];
dst[gid + groupnum] = localmem_max[0];
}
}
__kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst,
int cols, int invalid_cols, int offset,
int elemnum, int groupnum,
const __global uchar * mask, int minvalid_cols, int moffset)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
unsigned int midx = moffset + id + (id / cols) * minvalid_cols;
__local T localmem_max[128], localmem_min[128];
T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
{
idx = offset + id + (id / cols) * invalid_cols;
midx = moffset + id + (id / cols) * minvalid_cols;
if (mask[midx])
{
temp = src[idx];
minval = min(minval, temp);
maxval = max(maxval, temp);
}
}
if(lid > 127)
{
localmem_min[lid - 128] = minval;
localmem_max[lid - 128] = maxval;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_min[lid] = min(minval, localmem_min[lid]);
localmem_max[lid] = max(maxval, localmem_max[lid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
for (int lsize = 64; lsize > 0; lsize >>= 1)
{
if (lid < lsize)
{
int lid2 = lsize + lid;
localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (lid == 0)
{ {
dst[gid] = localmem_min[0]; dst[gid] = localmem_min[0];
dst[gid + groupnum] = localmem_max[0]; dst[gid + groupnum] = localmem_max[0];

View File

@@ -41,151 +41,53 @@
// or tort (including negligence or otherwise) arising in any way out of // or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
///
/**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar8
#endif #endif
#if defined (DEPTH_1)
#define VEC_TYPE char8
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort8
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short8
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int8
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float8
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double8
#endif
#if defined (REPEAT_S0)
#define repeat_s(a) a = a;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a) a.s0 = 0;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0;
#endif
#if defined (REPEAT_E0)
#define repeat_e(a) a = a;
#endif
#if defined (REPEAT_E1)
#define repeat_e(a) a.s7 = 0;
#endif
#if defined (REPEAT_E2)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0;
#endif
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
/**************************************Count NonZero**************************************/ /**************************************Count NonZero**************************************/
__kernel void arithm_op_nonzero (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global int8 *dst) __kernel void arithm_op_nonzero(int cols, int invalid_cols, int offset, int elemnum, int groupnum,
__global srcT *src, __global dstT *dst)
{ {
unsigned int lid = get_local_id(0); unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0); unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0); unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols; unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local int8 localmem_nonzero[128]; __local dstT localmem_nonzero[128];
int8 nonzero; dstT nonzero = (dstT)(0);
VEC_TYPE zero=0,one=1,temp; srcT zero = (srcT)(0), one = (srcT)(1);
if(id < elemnum)
{ for (int grain = groupnum << 8; id < elemnum; id += grain)
temp = src[idx];
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
nonzero = convert_int8(temp == zero ? zero:one);
}
else
{
nonzero = 0;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{ {
idx = offset + id + (id / cols) * invalid_cols; idx = offset + id + (id / cols) * invalid_cols;
temp = src[idx]; nonzero += src[idx] == zero ? zero : one;
if(id % cols == 0 )
{
repeat_s(temp);
} }
if(id % cols == cols - 1)
{ if (lid > 127)
repeat_e(temp);
}
nonzero = nonzero + convert_int8(temp == zero ? zero:one);
}
if(lid > 127)
{
localmem_nonzero[lid - 128] = nonzero; localmem_nonzero[lid - 128] = nonzero;
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{ if (lid < 128)
localmem_nonzero[lid] = nonzero + localmem_nonzero[lid]; localmem_nonzero[lid] = nonzero + localmem_nonzero[lid];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
for (int lsize = 64; lsize > 0; lsize >>= 1)
{ {
if(lid < lsize) if (lid < lsize)
{ {
int lid2 = lsize + lid; int lid2 = lsize + lid;
localmem_nonzero[lid] = localmem_nonzero[lid] + localmem_nonzero[lid2]; localmem_nonzero[lid] = localmem_nonzero[lid] + localmem_nonzero[lid2];
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if( lid == 0)
{ if (lid == 0)
dst[gid] = localmem_nonzero[0]; dst[gid] = localmem_nonzero[0];
}
} }

View File

@@ -45,110 +45,125 @@
// //
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif #endif
#define CV_PI 3.1415926535898
/**************************************phase inradians**************************************/
__kernel void arithm_phase_inradians_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
#define CV_PI 3.1415926535898
#define CV_2PI 2*3.1415926535898
/**************************************phase inradians**************************************/
__kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1,
__global float *src2, int src2_step1, int src2_offset1,
__global float *dst, int dst_step1, int dst_offset1,
int cols, int rows)
{
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
float data1 = *((__global float *)((__global char *)src1 + src1_index)); float data1 = src1[src1_index];
float data2 = *((__global float *)((__global char *)src2 + src2_index)); float data2 = src2[src2_index];
float tmp = atan2(data2,data1); float tmp = atan2(data2, data1);
*((__global float *)((__global char *)dst + dst_index)) = tmp; if (tmp < 0)
tmp += CV_2PI;
dst[dst_index] = tmp;
} }
} }
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
__kernel void arithm_phase_inradians_D6 (__global double *src1, int src1_step, int src1_offset, __kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1,
__global double *src2, int src2_step, int src2_offset, __global double *src2, int src2_step1, int src2_offset1,
__global double *dst, int dst_step, int dst_offset, __global double *dst, int dst_step1, int dst_offset1,
int rows, int cols, int dst_step1) int cols, int rows)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
double data1 = *((__global double *)((__global char *)src1 + src1_index)); double data1 = src1[src1_index];
double data2 = *((__global double *)((__global char *)src2 + src2_index)); double data2 = src2[src2_index];
double tmp = atan2(data2, data1);
*((__global double *)((__global char *)dst + dst_index)) = atan2(data2,data1); if (tmp < 0)
tmp += CV_2PI;
dst[dst_index] = tmp;
} }
} }
#endif #endif
/**************************************phase indegrees**************************************/ /**************************************phase indegrees**************************************/
__kernel void arithm_phase_indegrees_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
__kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1,
__global float *src2, int src2_step1, int src2_offset1,
__global float *dst, int dst_step1, int dst_offset1,
int cols, int rows)
{
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
float data1 = *((__global float *)((__global char *)src1 + src1_index)); float data1 = src1[src1_index];
float data2 = *((__global float *)((__global char *)src2 + src2_index)); float data2 = src2[src2_index];
float tmp = atan2(data2,data1); float tmp = atan2(data2, data1);
float tmp_data = 180*tmp/CV_PI; tmp = 180 * tmp / CV_PI;
*((__global float *)((__global char *)dst + dst_index)) = tmp_data; if (tmp < 0)
tmp += 360;
dst[dst_index] = tmp;
} }
} }
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step, int src1_offset, __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1,
__global double *src2, int src2_step, int src2_offset, __global double *src2, int src2_step1, int src2_offset1,
__global double *dst, int dst_step, int dst_offset, __global double *dst, int dst_step1, int dst_offset1,
int rows, int cols, int dst_step1) int cols, int rows)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
double data1 = *((__global double *)((__global char *)src1 + src1_index)); double data1 = src1[src1_index];
double data2 = *((__global double *)((__global char *)src2 + src2_index)); double data2 = src2[src2_index];
double tmp = atan2(data2,data1); double tmp = atan2(src2[src2_index], src1[src1_index]);
double tmp_data = 180*tmp/CV_PI;
*((__global double *)((__global char *)dst + dst_index)) = tmp_data; tmp = 180 * tmp / CV_PI;
if (tmp < 0)
tmp += 360;
dst[dst_index] = tmp;
} }
} }
#endif #endif

View File

@@ -42,6 +42,7 @@
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64 #ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
@@ -50,51 +51,19 @@
#endif #endif
#endif #endif
__kernel void setIdentity(__global T * src, int src_step, int src_offset,
#if defined (DOUBLE_SUPPORT) int cols, int rows, __global const T * scalar)
#define DATA_TYPE double
#else
#define DATA_TYPE float
#endif
__kernel void setIdentityKernel_F1(__global float* src, int src_row, int src_col, int src_step, DATA_TYPE scalar)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < src_col && y < src_row) if (x < cols && y < rows)
{ {
if(x == y) int src_index = mad24(y, src_step, src_offset + x);
src[y * src_step + x] = scalar;
if (x == y)
src[src_index] = *scalar;
else else
src[y * src_step + x] = 0 * scalar; src[src_index] = 0;
}
}
__kernel void setIdentityKernel_D1(__global DATA_TYPE* src, int src_row, int src_col, int src_step, DATA_TYPE scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < src_col && y < src_row)
{
if(x == y)
src[y * src_step + x] = scalar;
else
src[y * src_step + x] = 0 * scalar;
}
}
__kernel void setIdentityKernel_I1(__global int* src, int src_row, int src_col, int src_step, int scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < src_col && y < src_row)
{
if(x == y)
src[y * src_step + x] = scalar;
else
src[y * src_step + x] = 0 * scalar;
} }
} }

View File

@@ -43,163 +43,62 @@
// //
//M*/ //M*/
/**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#define RES_TYPE double8 #elif defined (cl_amd_fp64)
#define CONVERT_RES_TYPE convert_double8 #pragma OPENCL EXTENSION cl_amd_fp64:enable
#else #endif
#define RES_TYPE float8
#define CONVERT_RES_TYPE convert_float8
#endif #endif
#if defined (DEPTH_0) #if defined (FUNC_SUM)
#define VEC_TYPE uchar8 #define FUNC(a, b) b += a;
#endif #endif
#if defined (DEPTH_1) #if defined (FUNC_ABS_SUM)
#define VEC_TYPE char8 #define FUNC(a, b) b += a >= 0 ? a : -a;
#endif #endif
#if defined (DEPTH_2) #if defined (FUNC_SQR_SUM)
#define VEC_TYPE ushort8 #define FUNC(a, b) b += a * a;
#endif #endif
#if defined (DEPTH_3)
#define VEC_TYPE short8
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int8
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float8
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double8
#endif
#if defined (FUNC_TYPE_0)
#define FUNC(a,b) b += a;
#endif
#if defined (FUNC_TYPE_1)
#define FUNC(a,b) b = b + (a >= 0 ? a : -a);
#endif
#if defined (FUNC_TYPE_2)
#define FUNC(a,b) b = b + a * a;
#endif
#if defined (REPEAT_S0)
#define repeat_s(a) a = a;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a) a.s0 = 0;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0;
#endif
#if defined (REPEAT_E0)
#define repeat_e(a) a = a;
#endif
#if defined (REPEAT_E1)
#define repeat_e(a) a.s7 = 0;
#endif
#if defined (REPEAT_E2)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0;
#endif
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
/**************************************Array buffer SUM**************************************/ /**************************************Array buffer SUM**************************************/
__kernel void arithm_op_sum (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global RES_TYPE *dst) __kernel void arithm_op_sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global srcT *src, __global dstT *dst)
{ {
unsigned int lid = get_local_id(0); unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0); unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0); unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols; unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local RES_TYPE localmem_sum[128];
RES_TYPE sum = 0,temp; __local dstT localmem_sum[128];
if(id < elemnum) dstT sum = (dstT)(0), temp;
{
temp = CONVERT_RES_TYPE(src[idx]); for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
FUNC(temp,sum);
}
else
{
sum = 0;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{ {
idx = offset + id + (id / cols) * invalid_cols; idx = offset + id + (id / cols) * invalid_cols;
temp = CONVERT_RES_TYPE(src[idx]); temp = convertToDstT(src[idx]);
if(id % cols == 0 ) FUNC(temp, sum);
{
repeat_s(temp);
} }
if(id % cols == cols - 1)
{ if (lid > 127)
repeat_e(temp);
}
FUNC(temp,sum);
}
if(lid > 127)
{
localmem_sum[lid - 128] = sum; localmem_sum[lid - 128] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{ if (lid < 128)
localmem_sum[lid] = sum + localmem_sum[lid]; localmem_sum[lid] = sum + localmem_sum[lid];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
for (int lsize = 64; lsize > 0; lsize >>= 1)
{ {
if(lid < lsize) if (lid < lsize)
{ {
int lid2 = lsize + lid; int lid2 = lsize + lid;
localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2]; localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if( lid == 0)
{ if (lid == 0)
dst[gid] = localmem_sum[0]; dst[gid] = localmem_sum[0];
}
} }

View File

@@ -1,247 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Shengen Yan,yanshengen@gmail.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#define RES_TYPE double4
#define CONVERT_RES_TYPE convert_double4
#else
#define RES_TYPE float4
#define CONVERT_RES_TYPE convert_float4
#endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar4
#endif
#if defined (DEPTH_1)
#define VEC_TYPE char4
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort4
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short4
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int4
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float4
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double4
#endif
#if defined (FUNC_TYPE_0)
#define FUNC(a,b) b += a;
#endif
#if defined (FUNC_TYPE_1)
#define FUNC(a,b) b = b + (a >= 0 ? a : -a);
#endif
#if defined (FUNC_TYPE_2)
#define FUNC(a,b) b = b + a * a;
#endif
#if defined (REPEAT_S0)
#define repeat_s(a,b,c) a=a; b =b; c=c;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a,b,c) a.s0=0; b=b; c=c;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a,b,c) a.s0=0; a.s1=0; b=b; c=c;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a,b,c) a.s0=0; a.s1=0; a.s2=0; b=b; c=c;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a,b,c) a=0;b=b; c=c;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a,b,c) a=0; b.s0=0;c=c;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; c=c;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; b.s2=0; c=c;
#endif
#if defined (REPEAT_S8)
#define repeat_s(a,b,c) a=0; b=0; c=c;
#endif
#if defined (REPEAT_S9)
#define repeat_s(a,b,c) a=0; b=0; c.s0=0;
#endif
#if defined (REPEAT_S10)
#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0;
#endif
#if defined (REPEAT_S11)
#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0; c.s2=0;
#endif
#if defined (REPEAT_E0)
#define repeat_e(a,b,c) a=a; b =b; c=c;
#endif
#if defined (REPEAT_E1)
#define repeat_e(a,b,c) a=a; b=b; c.s3=0;
#endif
#if defined (REPEAT_E2)
#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0; c.s1=0;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a,b,c) a=a; b=b; c=0;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a,b,c) a=a; b.s3=0; c=0;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; c=0;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; b.s1=0; c=0;
#endif
#if defined (REPEAT_E8)
#define repeat_e(a,b,c) a=a; b=0; c=0;
#endif
#if defined (REPEAT_E9)
#define repeat_e(a,b,c) a.s3=0; b=0; c=0;
#endif
#if defined (REPEAT_E10)
#define repeat_e(a,b,c) a.s3=0; a.s2=0; b=0; c=0;
#endif
#if defined (REPEAT_E11)
#define repeat_e(a,b,c) a.s3=0; a.s2=0; a.s1=0; b=0; c=0;
#endif
__kernel void arithm_op_sum_3 (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global RES_TYPE *dst)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
idx = idx * 3;
__local RES_TYPE localmem_sum1[128];
__local RES_TYPE localmem_sum2[128];
__local RES_TYPE localmem_sum3[128];
RES_TYPE sum1 = 0,sum2 = 0,sum3 = 0,temp1,temp2,temp3;
if(id < elemnum)
{
temp1 = CONVERT_RES_TYPE(src[idx]);
temp2 = CONVERT_RES_TYPE(src[idx+1]);
temp3 = CONVERT_RES_TYPE(src[idx+2]);
if(id % cols == 0 )
{
repeat_s(temp1,temp2,temp3);
}
if(id % cols == cols - 1)
{
repeat_e(temp1,temp2,temp3);
}
FUNC(temp1,sum1);
FUNC(temp2,sum2);
FUNC(temp3,sum3);
}
else
{
sum1 = 0;
sum2 = 0;
sum3 = 0;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{
idx = offset + id + (id / cols) * invalid_cols;
idx = idx * 3;
temp1 = CONVERT_RES_TYPE(src[idx]);
temp2 = CONVERT_RES_TYPE(src[idx+1]);
temp3 = CONVERT_RES_TYPE(src[idx+2]);
if(id % cols == 0 )
{
repeat_s(temp1,temp2,temp3);
}
if(id % cols == cols - 1)
{
repeat_e(temp1,temp2,temp3);
}
FUNC(temp1,sum1);
FUNC(temp2,sum2);
FUNC(temp3,sum3);
}
if(lid > 127)
{
localmem_sum1[lid - 128] = sum1;
localmem_sum2[lid - 128] = sum2;
localmem_sum3[lid - 128] = sum3;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_sum1[lid] = sum1 + localmem_sum1[lid];
localmem_sum2[lid] = sum2 + localmem_sum2[lid];
localmem_sum3[lid] = sum3 + localmem_sum3[lid];
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
{
if(lid < lsize)
{
int lid2 = lsize + lid;
localmem_sum1[lid] = localmem_sum1[lid] + localmem_sum1[lid2];
localmem_sum2[lid] = localmem_sum2[lid] + localmem_sum2[lid2];
localmem_sum3[lid] = localmem_sum3[lid] + localmem_sum3[lid2];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if( lid == 0)
{
dst[gid*3] = localmem_sum1[0];
dst[gid*3+1] = localmem_sum2[0];
dst[gid*3+2] = localmem_sum3[0];
}
}

View File

@@ -220,8 +220,8 @@ PARAM_TEST_CASE(ArithmTestBase, int, int, bool)
cv::RNG &rng = TS::ptr()->get_rng(); cv::RNG &rng = TS::ptr()->get_rng();
src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 2, 11, false);
src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -15440, 14450, false); src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -1540, 1740, false);
dst1 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); dst1 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false);
dst2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); dst2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false);
mask = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), CV_8UC1, 0, 2, false); mask = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), CV_8UC1, 0, 2, false);
@@ -464,7 +464,6 @@ TEST_P(Mul, Scalar)
} }
} }
TEST_P(Mul, Mat_Scalar) TEST_P(Mul, Mat_Scalar)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
@@ -507,7 +506,6 @@ TEST_P(Div, Scalar)
} }
} }
TEST_P(Div, Mat_Scalar) TEST_P(Div, Mat_Scalar)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
@@ -753,7 +751,7 @@ TEST_P(MinMax, MAT)
} }
} }
TEST_P(MinMax, DISABLED_MASK) TEST_P(MinMax, MASK)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
@@ -1022,7 +1020,7 @@ TEST_P(MinMaxLoc, MASK)
typedef ArithmTestBase Sum; typedef ArithmTestBase Sum;
TEST_P(Sum, DISABLED_MAT) TEST_P(Sum, MAT)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
@@ -1031,7 +1029,121 @@ TEST_P(Sum, DISABLED_MAT)
Scalar cpures = cv::sum(src1_roi); Scalar cpures = cv::sum(src1_roi);
Scalar gpures = cv::ocl::sum(gsrc1); Scalar gpures = cv::ocl::sum(gsrc1);
//check results // check results
EXPECT_NEAR(cpures[0], gpures[0], 0.1);
EXPECT_NEAR(cpures[1], gpures[1], 0.1);
EXPECT_NEAR(cpures[2], gpures[2], 0.1);
EXPECT_NEAR(cpures[3], gpures[3], 0.1);
}
}
typedef ArithmTestBase SqrSum;
template <typename T, typename WT>
static Scalar sqrSum(const Mat & src)
{
Scalar sum = Scalar::all(0);
int cn = src.channels();
WT data[4] = { 0, 0, 0, 0 };
int cols = src.cols * cn;
for (int y = 0; y < src.rows; ++y)
{
const T * const sdata = src.ptr<T>(y);
for (int x = 0; x < cols; )
for (int i = 0; i < cn; ++i, ++x)
{
WT t = static_cast<WT>(sdata[x]);
data[i] += t * t;
}
}
for (int i = 0; i < cn; ++i)
sum[i] = static_cast<double>(data[i]);
return sum;
}
typedef Scalar (*sumFunc)(const Mat &);
TEST_P(SqrSum, MAT)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
static sumFunc funcs[] = { sqrSum<uchar, int>,
sqrSum<char, int>,
sqrSum<ushort, int>,
sqrSum<short, int>,
sqrSum<int, int>,
sqrSum<float, double>,
sqrSum<double, double>,
0 };
sumFunc func = funcs[src1_roi.depth()];
CV_Assert(func != 0);
Scalar cpures = func(src1_roi);
Scalar gpures = cv::ocl::sqrSum(gsrc1);
// check results
EXPECT_NEAR(cpures[0], gpures[0], 1.0);
EXPECT_NEAR(cpures[1], gpures[1], 1.0);
EXPECT_NEAR(cpures[2], gpures[2], 1.0);
EXPECT_NEAR(cpures[3], gpures[3], 1.0);
}
}
typedef ArithmTestBase AbsSum;
template <typename T, typename WT>
static Scalar absSum(const Mat & src)
{
Scalar sum = Scalar::all(0);
int cn = src.channels();
WT data[4] = { 0, 0, 0, 0 };
int cols = src.cols * cn;
for (int y = 0; y < src.rows; ++y)
{
const T * const sdata = src.ptr<T>(y);
for (int x = 0; x < cols; )
for (int i = 0; i < cn; ++i, ++x)
{
WT t = static_cast<WT>(sdata[x]);
data[i] += t >= 0 ? t : -t;
}
}
for (int i = 0; i < cn; ++i)
sum[i] = static_cast<double>(data[i]);
return sum;
}
TEST_P(AbsSum, MAT)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
static sumFunc funcs[] = { absSum<uchar, int>,
absSum<char, int>,
absSum<ushort, int>,
absSum<short, int>,
absSum<int, int>,
absSum<float, double>,
absSum<double, double>,
0 };
sumFunc func = funcs[src1_roi.depth()];
CV_Assert(func != 0);
Scalar cpures = func(src1_roi);
Scalar gpures = cv::ocl::absSum(gsrc1);
// check results
EXPECT_NEAR(cpures[0], gpures[0], 0.1); EXPECT_NEAR(cpures[0], gpures[0], 0.1);
EXPECT_NEAR(cpures[1], gpures[1], 0.1); EXPECT_NEAR(cpures[1], gpures[1], 0.1);
EXPECT_NEAR(cpures[2], gpures[2], 0.1); EXPECT_NEAR(cpures[2], gpures[2], 0.1);
@@ -1059,17 +1171,27 @@ TEST_P(CountNonZero, MAT)
typedef ArithmTestBase Phase; typedef ArithmTestBase Phase;
TEST_P(Phase, DISABLED_Mat) TEST_P(Phase, angleInDegrees)
{ {
for (int angelInDegrees = 0; angelInDegrees < 2; angelInDegrees++)
{
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); random_roi();
cv::phase(src1_roi, src2_roi, dst1_roi, angelInDegrees ? true : false); cv::phase(src1_roi, src2_roi, dst1_roi, true);
cv::ocl::phase(gsrc1, gsrc2, gdst1, angelInDegrees ? true : false); cv::ocl::phase(gsrc1, gsrc2, gdst1, true);
Near(1e-2); Near(1e-2);
} }
}
TEST_P(Phase, angleInRadians)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::phase(src1_roi, src2_roi, dst1_roi);
cv::ocl::phase(gsrc1, gsrc2, gdst1);
Near(1e-2);
} }
} }
@@ -1301,32 +1423,136 @@ TEST_P(AddWeighted, Mat)
} }
} }
//////////////////////////////// setIdentity /////////////////////////////////////////////////
typedef ArithmTestBase SetIdentity;
TEST_P(SetIdentity, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::setIdentity(dst1_roi, val);
cv::ocl::setIdentity(gdst1, val);
Near(0);
}
}
//////////////////////////////// meanStdDev /////////////////////////////////////////////////
typedef ArithmTestBase MeanStdDev;
TEST_P(MeanStdDev, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
Scalar cpu_mean, cpu_stddev;
Scalar gpu_mean, gpu_stddev;
cv::meanStdDev(src1_roi, cpu_mean, cpu_stddev);
cv::ocl::meanStdDev(gsrc1, gpu_mean, gpu_stddev);
for (int i = 0; i < 4; ++i)
{
EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 0.1);
EXPECT_NEAR(cpu_stddev[i], gpu_stddev[i], 0.1);
}
}
}
//////////////////////////////// Norm /////////////////////////////////////////////////
typedef ArithmTestBase Norm;
TEST_P(Norm, NORM_INF)
{
for (int relative = 0; relative < 2; ++relative)
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
int type = NORM_INF;
if (relative == 1)
type |= NORM_RELATIVE;
const double cpuRes = cv::norm(src1_roi, src2_roi, type);
const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type);
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
}
}
TEST_P(Norm, NORM_L1)
{
for (int relative = 0; relative < 2; ++relative)
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
int type = NORM_L1;
if (relative == 1)
type |= NORM_RELATIVE;
const double cpuRes = cv::norm(src1_roi, src2_roi, type);
const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type);
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
}
}
TEST_P(Norm, NORM_L2)
{
for (int relative = 0; relative < 2; ++relative)
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
int type = NORM_L2;
if (relative == 1)
type |= NORM_RELATIVE;
const double cpuRes = cv::norm(src1_roi, src2_roi, type);
const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type);
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
}
}
//////////////////////////////////////// Instantiation ///////////////////////////////////////// //////////////////////////////////////// Instantiation /////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, SqrSum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, AbsSum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, SetIdentity, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
#endif // HAVE_OPENCL #endif // HAVE_OPENCL

View File

@@ -1,63 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
typedef ::testing::TestWithParam<cv::Size> normFixture;
TEST_P(normFixture, DISABLED_accuracy)
{
const cv::Size srcSize = GetParam();
cv::Mat src1(srcSize, CV_8UC1), src2(srcSize, CV_8UC1);
cv::randu(src1, 0, 2);
cv::randu(src2, 0, 2);
cv::ocl::oclMat oclSrc1(src1), oclSrc2(src2);
double value = cv::norm(src1, src2, cv::NORM_INF);
double oclValue = cv::ocl::norm(oclSrc1, oclSrc2, cv::NORM_INF);
ASSERT_EQ(value, oclValue);
}
INSTANTIATE_TEST_CASE_P(oclNormTest, normFixture,
::testing::Values(cv::Size(500, 500), cv::Size(1000, 1000)));