From 5b598f8a0e9048dd800d479e8abd979e012effdd Mon Sep 17 00:00:00 2001 From: yao Date: Thu, 30 May 2013 16:20:31 +0800 Subject: [PATCH 01/27] a few fixes of ocl::perf test cases --- modules/ocl/perf/perf_arithm.cpp | 5 - modules/ocl/perf/perf_filters.cpp | 8 +- modules/ocl/perf/perf_imgproc.cpp | 197 ++---------------------------- 3 files changed, 10 insertions(+), 200 deletions(-) diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp index 4f690e091..3ef0634e7 100644 --- a/modules/ocl/perf/perf_arithm.cpp +++ b/modules/ocl/perf/perf_arithm.cpp @@ -62,7 +62,6 @@ PERFTEST(lut) gen(src, size, size, all_type[j], 0, 256); gen(lut, 1, 256, CV_8UC1, 0, 1); - dst = src; LUT(src, lut, dst); @@ -233,8 +232,6 @@ PERFTEST(Mul) gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); - dst = src1; - dst.setTo(0); multiply(src1, src2, dst); @@ -281,8 +278,6 @@ PERFTEST(Div) gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); - dst = src1; - dst.setTo(0); divide(src1, src2, dst); diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index c8c840d0e..5cf92711e 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -291,9 +291,7 @@ PERFTEST(GaussianBlur) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; - gen(src, size, size, all_type[j], 0, 256); - dst = src; - dst.setTo(0); + gen(src, size, size, all_type[j], 5, 16); GaussianBlur(src, dst, Size(9, 9), 0); @@ -346,8 +344,8 @@ PERFTEST(filter2D) Mat kernel; gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); - Mat dst, ocl_dst; - dst.setTo(0); + Mat dst, ocl_dst; + cv::filter2D(src, dst, -1, kernel); CPU_ON; diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index 18c7429e8..0aef8b27e 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -674,8 +674,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size coor.y = static_cast(y0); return coor; } -void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit); -void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit) + +static void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit) { if( src_roi.empty() ) CV_Error( CV_StsBadArg, "The input image is empty" ); @@ -683,6 +683,8 @@ void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::T if( src_roi.depth() != CV_8U || src_roi.channels() != 4 ) CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); + dst_roi.create(src_roi.size(), src_roi.type()); + CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) ); CV_Assert( !(dst_roi.step & 0x3) ); @@ -725,9 +727,6 @@ PERFTEST(meanShiftFiltering) SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4"; gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - //gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - dst = src; - dst.setTo(0); cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1); @@ -756,201 +755,21 @@ PERFTEST(meanShiftFiltering) TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); } } -///////////// meanShiftProc//////////////////////// -#if 0 -COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab) -{ - - int isr2 = sr * sr; - int c0, c1, c2, c3; - int iter; - uchar *ptr = NULL; - uchar *pstart = NULL; - int revx = 0, revy = 0; - c0 = sptr[0]; - c1 = sptr[1]; - c2 = sptr[2]; - c3 = sptr[3]; - - // iterate meanshift procedure - for (iter = 0; iter < maxIter; iter++) - { - int count = 0; - int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; - - //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) - int minx = x0 - sp; - int miny = y0 - sp; - int maxx = x0 + sp; - int maxy = y0 + sp; - - //deal with the image boundary - if (minx < 0) - { - minx = 0; - } - - if (miny < 0) - { - miny = 0; - } - - if (maxx >= size.width) - { - maxx = size.width - 1; - } - - if (maxy >= size.height) - { - maxy = size.height - 1; - } - - if (iter == 0) - { - pstart = sptr; - } - else - { - pstart = pstart + revy * sstep + (revx << 2); //point to the new position - } - - ptr = pstart; - ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row - - for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) - { - int rowCount = 0; - int x = minx; -#if CV_ENABLE_UNROLLED - - for (; x + 4 <= maxx; x += 4, ptr += 16) - { - int t0, t1, t2; - t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x; - rowCount++; - } - - t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 1; - rowCount++; - } - - t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 2; - rowCount++; - } - - t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 3; - rowCount++; - } - } - -#endif - - for (; x <= maxx; x++, ptr += 4) - { - int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x; - rowCount++; - } - } - - if (rowCount == 0) - { - continue; - } - - count += rowCount; - sy += y * rowCount; - } - - if (count == 0) - { - break; - } - - int x1 = sx / count; - int y1 = sy / count; - s0 = s0 / count; - s1 = s1 / count; - s2 = s2 / count; - - bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + - tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); - - //revise the pointer corresponding to the new (y0,x0) - revx = x1 - x0; - revy = y1 - y0; - - x0 = x1; - y0 = y1; - c0 = s0; - c1 = s1; - c2 = s2; - - if (stopFlag) - { - break; - } - } //for iter - - dptr[0] = (uchar)c0; - dptr[1] = (uchar)c1; - dptr[2] = (uchar)c2; - dptr[3] = (uchar)c3; - - COOR coor; - coor.x = static_cast(x0); - coor.y = static_cast(y0); - return coor; -} -#endif void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit) { - if (src_roi.empty()) { CV_Error(CV_StsBadArg, "The input image is empty"); } - if (src_roi.depth() != CV_8U || src_roi.channels() != 4) { CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported"); } + dst_roi.create(src_roi.size(), src_roi.type()); + dstCoor_roi.create(src_roi.size(), CV_16SC2); + CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) && (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows)); CV_Assert(!(dstCoor_roi.step & 0x3)); @@ -1008,8 +827,6 @@ PERFTEST(meanShiftProc) SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 "; gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - gen(dst[0], size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - gen(dst[1], size, size, CV_16SC2, Scalar::all(0), Scalar::all(256)); meanShiftProc_(src, dst[0], dst[1], 5, 6, crit); From fdc133d8c9e112d4a20ffc9f2efdfbb7754f048b Mon Sep 17 00:00:00 2001 From: peng xiao Date: Thu, 30 May 2013 09:37:05 +0800 Subject: [PATCH 02/27] Fix ocl::pyrup kernel build on Mac. --- modules/ocl/src/opencl/pyr_up.cl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modules/ocl/src/opencl/pyr_up.cl b/modules/ocl/src/opencl/pyr_up.cl index ef41c9408..4afa7b710 100644 --- a/modules/ocl/src/opencl/pyr_up.cl +++ b/modules/ocl/src/opencl/pyr_up.cl @@ -389,8 +389,8 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst, float4 sum = (float4)(0,0,0,0); - const int evenFlag = (int)((tidx & 1) == 0); - const int oddFlag = (int)((tidx & 1) != 0); + const float4 evenFlag = (float4)((tidx & 1) == 0); + const float4 oddFlag = (float4)((tidx & 1) != 0); const bool eveny = ((tidy & 1) == 0); float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); @@ -455,6 +455,7 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst, dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum); } } + /////////////////////////////////////////////////////////////////////// ////////////////////////// CV_16UC4 ////////////////////////////////// /////////////////////////////////////////////////////////////////////// @@ -492,8 +493,8 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, float4 sum = (float4)(0,0,0,0); - const int evenFlag = (int)((get_local_id(0) & 1) == 0); - const int oddFlag = (int)((get_local_id(0) & 1) != 0); + const float4 evenFlag = (float4)((get_local_id(0) & 1) == 0); + const float4 oddFlag = (float4)((get_local_id(0) & 1) != 0); const bool eveny = ((get_local_id(1) & 1) == 0); const int tidx = get_local_id(0); @@ -604,8 +605,8 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst, float4 sum = (float4)(0,0,0,0); - const int evenFlag = (int)((tidx & 1) == 0); - const int oddFlag = (int)((tidx & 1) != 0); + const float4 evenFlag = (float4)((tidx & 1) == 0); + const float4 oddFlag = (float4)((tidx & 1) != 0); const bool eveny = ((tidy & 1) == 0); float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); @@ -669,4 +670,4 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst, { dst[x + y * dstStep] = 4.0f * sum; } -} \ No newline at end of file +} From b1c248fcc9eaa24baabf55fde7fcff096d62dcb2 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Fri, 31 May 2013 10:53:52 +0800 Subject: [PATCH 03/27] Fix ocl::filter2D. In current implementation, this function only works when anchor point is in the kernel center and kernel size supported is either 3x3 or 5x5. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 2 ++ modules/ocl/src/filtering.cpp | 10 +++++++--- modules/ocl/src/opencl/filtering_laplacian.cl | 12 ++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 785248cfc..01b0f72d2 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -689,6 +689,8 @@ namespace cv } //! applies non-separable 2D linear filter to the image + // Note, at the moment this function only works when anchor point is in the kernel center + // and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 56a70ae53..f35a26e33 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -645,7 +645,11 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows)); - openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth); + const int buffer_size = 100; + char opt_buffer [buffer_size] = ""; + sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y); + + openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer); } Ptr cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, Point anchor, int borderType) @@ -656,7 +660,7 @@ Ptr cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const oclMat gpu_krnl; int nDivisor; - normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true); + normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false); normalizeAnchor(anchor, ksize); return Ptr(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)], @@ -1172,7 +1176,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy)); args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); - openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH); + openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option); } Ptr cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype) diff --git a/modules/ocl/src/opencl/filtering_laplacian.cl b/modules/ocl/src/opencl/filtering_laplacian.cl index 96a2f51ef..8535eb1a5 100644 --- a/modules/ocl/src/opencl/filtering_laplacian.cl +++ b/modules/ocl/src/opencl/filtering_laplacian.cl @@ -82,9 +82,9 @@ ////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////Macro for define elements number per thread///////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// -#define ANCHOR 3 -#define ANX 1 -#define ANY 1 +//#define ANCHOR 3 +//#define ANX 1 +//#define ANY 1 #define ROWS_PER_GROUP 4 #define ROWS_PER_GROUP_BITS 2 @@ -185,7 +185,7 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x for(int i = 0; i < ANCHOR; i++) { -#pragma unroll 3 +#pragma unroll for(int j = 0; j < ANCHOR; j++) { if(dst_rows_index < dst_rows_end) @@ -295,7 +295,7 @@ __kernel void filter2D_C1_D5(__global float *src, int src_step, int src_offset_x for(int i = 0; i < ANCHOR; i++) { -#pragma unroll 3 +#pragma unroll for(int j = 0; j < ANCHOR; j++) { if(dst_rows_index < dst_rows_end) @@ -410,7 +410,7 @@ __kernel void filter2D_C4_D0(__global uchar4 *src, int src_step, int src_offset_ for(int i = 0; i < ANCHOR; i++) { -#pragma unroll 3 +#pragma unroll for(int j = 0; j < ANCHOR; j++) { if(dst_rows_index < dst_rows_end) From abefcc60612617cdc48b278e347bdf0df4b0139b Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 31 May 2013 15:16:03 +0800 Subject: [PATCH 04/27] Adjust perf_filters, as this function only supports 3x3 kernel --- modules/ocl/perf/perf_filters.cpp | 47 +++++++++++++++---------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index 5cf92711e..a05301b34 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -337,39 +337,38 @@ PERFTEST(filter2D) { gen(src, size, size, all_type[j], 0, 256); - for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1) - { - SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ; + const int ksize = 3; - Mat kernel; - gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); + SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ; - Mat dst, ocl_dst; + Mat kernel; + gen(kernel, ksize, ksize, CV_32SC1, -3.0, 3.0); - cv::filter2D(src, dst, -1, kernel); + Mat dst, ocl_dst; - CPU_ON; - cv::filter2D(src, dst, -1, kernel); - CPU_OFF; + cv::filter2D(src, dst, -1, kernel); - ocl::oclMat d_src(src), d_dst; + CPU_ON; + cv::filter2D(src, dst, -1, kernel); + CPU_OFF; - WARMUP_ON; - ocl::filter2D(d_src, d_dst, -1, kernel); - WARMUP_OFF; + ocl::oclMat d_src(src), d_dst; - GPU_ON; - ocl::filter2D(d_src, d_dst, -1, kernel); - GPU_OFF; + WARMUP_ON; + ocl::filter2D(d_src, d_dst, -1, kernel); + WARMUP_OFF; - GPU_FULL_ON; - d_src.upload(src); - ocl::filter2D(d_src, d_dst, -1, kernel); - d_dst.download(ocl_dst); - GPU_FULL_OFF; + GPU_ON; + ocl::filter2D(d_src, d_dst, -1, kernel); + GPU_OFF; - TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); - } + GPU_FULL_ON; + d_src.upload(src); + ocl::filter2D(d_src, d_dst, -1, kernel); + d_dst.download(ocl_dst); + GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } From 15a213d3fca17d785441044d7126300d4f4ed4ef Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 31 May 2013 15:35:54 +0800 Subject: [PATCH 05/27] fix a crash on Linux --- modules/ocl/src/pyrlk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index a3e65dde3..8e9420480 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -142,7 +142,7 @@ static void lkSparse_run(oclMat &I, oclMat &J, int wave_size = queryDeviceInfo(kernel); openCLSafeCall(clReleaseKernel(kernel)); - static char opt[16] = {0}; + static char opt[32] = {0}; sprintf(opt, " -D WAVE_SIZE=%d", wave_size); openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, From d85f27b53791c144c70a7746ab3002d8d281e30e Mon Sep 17 00:00:00 2001 From: peng xiao Date: Fri, 31 May 2013 16:06:56 +0800 Subject: [PATCH 06/27] Update ocl::surf_matcher sample. The new sample adjust some parameters thus it should always be able to calculate valid homography matrix when input is box.png and box_in_scene.png. Pure cpp surf and bfmatcher implementation is also added to show the user its accuracy and performance. --- samples/ocl/surf_matcher.cpp | 413 ++++++++++++++++++++++++----------- 1 file changed, 285 insertions(+), 128 deletions(-) diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp index ea6ee97cb..8734e3f81 100644 --- a/samples/ocl/surf_matcher.cpp +++ b/samples/ocl/surf_matcher.cpp @@ -46,156 +46,101 @@ #include #include #include "opencv2/core/core.hpp" -#include "opencv2/features2d/features2d.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/ocl/ocl.hpp" -#include "opencv2/nonfree/nonfree.hpp" #include "opencv2/nonfree/ocl.hpp" #include "opencv2/calib3d/calib3d.hpp" +#include "opencv2/nonfree/nonfree.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; -//#define USE_CPU_DESCRIPTOR // use cpu descriptor extractor until ocl descriptor extractor is fixed -//#define USE_CPU_BFMATCHER +const int LOOP_NUM = 10; +const int GOOD_PTS_MAX = 50; +const float GOOD_PORTION = 0.15f; + +namespace +{ void help(); void help() { - cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << endl; - cout << "\nUsage:\n\tsurf_matcher --left --right " << endl; + std::cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << std::endl; + std::cout << "\nUsage:\n\tsurf_matcher --left --right [-c]" << std::endl; + std::cout << "\nExample:\n\tsurf_matcher --left box.png --right box_in_scene.png" << std::endl; } +int64 work_begin = 0; +int64 work_end = 0; -//////////////////////////////////////////////////// -// This program demonstrates the usage of SURF_OCL. -// use cpu findHomography interface to calculate the transformation matrix -int main(int argc, char* argv[]) +void workBegin() +{ + work_begin = getTickCount(); +} +void workEnd() { - if (argc != 5 && argc != 1) + work_end = getTickCount() - work_begin; +} +double getTime(){ + return work_end /((double)cvGetTickFrequency() * 1000.); +} + +template +struct SURFDetector +{ + KPDetector surf; + SURFDetector(double hessian = 800.0) + :surf(hessian) { - help(); - return -1; } - vector info; - if(!cv::ocl::getDevice(info)) + template + void operator()(const T& in, const T& mask, vector& pts, T& descriptors, bool useProvided = false) { - cout << "Error: Did not find a valid OpenCL device!" << endl; - return -1; + surf(in, mask, pts, descriptors, useProvided); } - Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey; - oclMat img1, img2; - if(argc != 5) +}; + +template +struct SURFMatcher +{ + KPMatcher matcher; + template + void match(const T& in1, const T& in2, vector& matches) { - cpu_img1 = imread("o.png"); - cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY); - img1 = cpu_img1_grey; - CV_Assert(!img1.empty()); - - cpu_img2 = imread("r2.png"); - cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY); - img2 = cpu_img2_grey; - } - else - { - for (int i = 1; i < argc; ++i) - { - if (string(argv[i]) == "--left") - { - cpu_img1 = imread(argv[++i]); - cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY); - img1 = cpu_img1_grey; - CV_Assert(!img1.empty()); - } - else if (string(argv[i]) == "--right") - { - cpu_img2 = imread(argv[++i]); - cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY); - img2 = cpu_img2_grey; - } - else if (string(argv[i]) == "--help") - { - help(); - return -1; - } - } + matcher.match(in1, in2, matches); } +}; - SURF_OCL surf; - //surf.hessianThreshold = 400.f; - //surf.extended = false; - - // detecting keypoints & computing descriptors - oclMat keypoints1GPU, keypoints2GPU; - oclMat descriptors1GPU, descriptors2GPU; - - // downloading results - vector keypoints1, keypoints2; - vector matches; - - -#ifndef USE_CPU_DESCRIPTOR - surf(img1, oclMat(), keypoints1GPU, descriptors1GPU); - surf(img2, oclMat(), keypoints2GPU, descriptors2GPU); - - surf.downloadKeypoints(keypoints1GPU, keypoints1); - surf.downloadKeypoints(keypoints2GPU, keypoints2); - - -#ifdef USE_CPU_BFMATCHER - //BFMatcher - BFMatcher matcher(cv::NORM_L2); - matcher.match(Mat(descriptors1GPU), Mat(descriptors2GPU), matches); -#else - BruteForceMatcher_OCL_base matcher(BruteForceMatcher_OCL_base::L2Dist); - matcher.match(descriptors1GPU, descriptors2GPU, matches); -#endif - -#else - surf(img1, oclMat(), keypoints1GPU); - surf(img2, oclMat(), keypoints2GPU); - surf.downloadKeypoints(keypoints1GPU, keypoints1); - surf.downloadKeypoints(keypoints2GPU, keypoints2); - - // use SURF_OCL to detect keypoints and use SURF to extract descriptors - SURF surf_cpu; - Mat descriptors1, descriptors2; - surf_cpu(cpu_img1, Mat(), keypoints1, descriptors1, true); - surf_cpu(cpu_img2, Mat(), keypoints2, descriptors2, true); - matcher.match(descriptors1, descriptors2, matches); -#endif - cout << "OCL: FOUND " << keypoints1GPU.cols << " keypoints on first image" << endl; - cout << "OCL: FOUND " << keypoints2GPU.cols << " keypoints on second image" << endl; - - double max_dist = 0; double min_dist = 100; - //-- Quick calculation of max and min distances between keypoints - for( size_t i = 0; i < keypoints1.size(); i++ ) - { - double dist = matches[i].distance; - if( dist < min_dist ) min_dist = dist; - if( dist > max_dist ) max_dist = dist; - } - - printf("-- Max dist : %f \n", max_dist ); - printf("-- Min dist : %f \n", min_dist ); - - //-- Draw only "good" matches (i.e. whose distance is less than 2.5*min_dist ) +Mat drawGoodMatches( + const Mat& cpu_img1, + const Mat& cpu_img2, + const vector& keypoints1, + const vector& keypoints2, + vector& matches, + vector& scene_corners_ + ) +{ + //-- Sort matches and preserve top 10% matches + std::sort(matches.begin(), matches.end()); std::vector< DMatch > good_matches; + double minDist = matches.front().distance, + maxDist = matches.back().distance; - for( size_t i = 0; i < keypoints1.size(); i++ ) + const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION)); + for( int i = 0; i < ptsPairs; i++ ) { - if( matches[i].distance < 3*min_dist ) - { - good_matches.push_back( matches[i]); - } + good_matches.push_back( matches[i] ); } + std::cout << "\nMax distance: " << maxDist << std::endl; + std::cout << "Min distance: " << minDist << std::endl; + + std::cout << "Calculating homography using " << ptsPairs << " point pairs." << std::endl; // drawing the results Mat img_matches; drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2, good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), - vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); + vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); //-- Localize the object std::vector obj; @@ -207,26 +152,238 @@ int main(int argc, char* argv[]) obj.push_back( keypoints1[ good_matches[i].queryIdx ].pt ); scene.push_back( keypoints2[ good_matches[i].trainIdx ].pt ); } - Mat H = findHomography( obj, scene, CV_RANSAC ); - //-- Get the corners from the image_1 ( the object to be "detected" ) std::vector obj_corners(4); obj_corners[0] = cvPoint(0,0); obj_corners[1] = cvPoint( cpu_img1.cols, 0 ); obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = cvPoint( 0, cpu_img1.rows ); std::vector scene_corners(4); - + + Mat H = findHomography( obj, scene, CV_RANSAC ); perspectiveTransform( obj_corners, scene_corners, H); + scene_corners_ = scene_corners; + //-- Draw lines between the corners (the mapped object in the scene - image_2 ) - line( img_matches, scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); - line( img_matches, scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); - line( img_matches, scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); - line( img_matches, scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); + line( img_matches, + scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + return img_matches; +} + +} +//////////////////////////////////////////////////// +// This program demonstrates the usage of SURF_OCL. +// use cpu findHomography interface to calculate the transformation matrix +int main(int argc, char* argv[]) +{ + vector info; + if(cv::ocl::getDevice(info) == 0) + { + std::cout << "Error: Did not find a valid OpenCL device!" << std::endl; + return -1; + } + ocl::setDevice(info[0]); + + Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey; + oclMat img1, img2; + bool useCPU = false; + bool useGPU = false; + bool useALL = false; + + for (int i = 1; i < argc; ++i) + { + if (string(argv[i]) == "--left") + { + cpu_img1 = imread(argv[++i]); + CV_Assert(!cpu_img1.empty()); + cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY); + img1 = cpu_img1_grey; + } + else if (string(argv[i]) == "--right") + { + cpu_img2 = imread(argv[++i]); + CV_Assert(!cpu_img2.empty()); + cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY); + img2 = cpu_img2_grey; + } + else if (string(argv[i]) == "-c") + { + useCPU = true; + useGPU = false; + useALL = false; + }else if(string(argv[i]) == "-g") + { + useGPU = true; + useCPU = false; + useALL = false; + }else if(string(argv[i]) == "-a") + { + useALL = true; + useCPU = false; + useGPU = false; + } + else if (string(argv[i]) == "--help") + { + help(); + return -1; + } + } + if(!useCPU) + { + std::cout + << "Device name:" + << info[0].DeviceName[0] + << std::endl; + } + double surf_time = 0.; + + //declare input/output + vector keypoints1, keypoints2; + vector matches; + + vector gpu_keypoints1; + vector gpu_keypoints2; + vector gpu_matches; + + Mat descriptors1CPU, descriptors2CPU; + + oclMat keypoints1GPU, keypoints2GPU; + oclMat descriptors1GPU, descriptors2GPU; + + //instantiate detectors/matchers + SURFDetector cpp_surf; + SURFDetector ocl_surf; + + SURFMatcher cpp_matcher; + SURFMatcher ocl_matcher; + + //-- start of timing section + if (useCPU) + { + for (int i = 0; i <= LOOP_NUM; i++) + { + if(i == 1) workBegin(); + cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU); + cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU); + cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches); + } + workEnd(); + std::cout << "CPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl; + std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl; + + surf_time = getTime(); + std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n"; + } + else if(useGPU) + { + for (int i = 0; i <= LOOP_NUM; i++) + { + if(i == 1) workBegin(); + ocl_surf(img1, oclMat(), keypoints1, descriptors1GPU); + ocl_surf(img2, oclMat(), keypoints2, descriptors2GPU); + ocl_matcher.match(descriptors1GPU, descriptors2GPU, matches); + } + workEnd(); + std::cout << "OCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl; + std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl; + + surf_time = getTime(); + std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n"; + }else + { + //cpu runs + for (int i = 0; i <= LOOP_NUM; i++) + { + if(i == 1) workBegin(); + cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU); + cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU); + cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches); + } + workEnd(); + std::cout << "\nCPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl; + std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl; + + surf_time = getTime(); + std::cout << "(CPP)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl; + + //gpu runs + for (int i = 0; i <= LOOP_NUM; i++) + { + if(i == 1) workBegin(); + ocl_surf(img1, oclMat(), gpu_keypoints1, descriptors1GPU); + ocl_surf(img2, oclMat(), gpu_keypoints2, descriptors2GPU); + ocl_matcher.match(descriptors1GPU, descriptors2GPU, gpu_matches); + } + workEnd(); + std::cout << "\nOCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl; + std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl; + + surf_time = getTime(); + std::cout << "(OCL)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n"; + + } + + //-------------------------------------------------------------------------- + std::vector cpu_corner; + Mat img_matches = drawGoodMatches(cpu_img1, cpu_img2, keypoints1, keypoints2, matches, cpu_corner); + + std::vector gpu_corner; + Mat ocl_img_matches; + if(useALL || (!useCPU&&!useGPU)) + { + ocl_img_matches = drawGoodMatches(cpu_img1, cpu_img2, gpu_keypoints1, gpu_keypoints2, gpu_matches, gpu_corner); + + //check accuracy + std::cout<<"\nCheck accuracy:\n"; + + if(cpu_corner.size()!=gpu_corner.size()) + std::cout<<"Failed\n"; + else + { + bool result = false; + for(int i = 0; i < cpu_corner.size(); i++) + { + if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10) + ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10)) + { + std::cout<<"Failed\n"; + result = false; + break; + } + result = true; + } + if(result) + std::cout<<"Passed\n"; + } + } //-- Show detected matches - namedWindow("ocl surf matches", 0); - imshow("ocl surf matches", img_matches); - waitKey(0); + if (useCPU) + { + namedWindow("cpu surf matches", 0); + imshow("cpu surf matches", img_matches); + } + else if(useGPU) + { + namedWindow("ocl surf matches", 0); + imshow("ocl surf matches", img_matches); + }else + { + namedWindow("cpu surf matches", 0); + imshow("cpu surf matches", img_matches); + namedWindow("ocl surf matches", 0); + imshow("ocl surf matches", ocl_img_matches); + } + waitKey(0); return 0; } From 97b86aa259ec1b33d1c5b28bd04d6247e7671a93 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Fri, 31 May 2013 16:48:40 +0800 Subject: [PATCH 07/27] Initialize OpenCL context at the end of getDevice call. Added for better compatibility with the current samples/test cases. User now will be able to initialize OpenCL context explicitly with ocl::getDevice api. This may be obsoleted in future releases. --- modules/ocl/src/initialization.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index fd462ad91..a9cd08b9f 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -333,6 +333,10 @@ namespace cv oclinfo.push_back(ocltmpinfo); } } + if(devcienums > 0) + { + setDevice(oclinfo[0]); + } return devcienums; } From cdb16f112074697445cc3c960142bc05dbfa916b Mon Sep 17 00:00:00 2001 From: peng xiao Date: Fri, 31 May 2013 17:29:55 +0800 Subject: [PATCH 08/27] Fix build error --- samples/ocl/surf_matcher.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp index 8734e3f81..038a8dc5c 100644 --- a/samples/ocl/surf_matcher.cpp +++ b/samples/ocl/surf_matcher.cpp @@ -350,7 +350,7 @@ int main(int argc, char* argv[]) else { bool result = false; - for(int i = 0; i < cpu_corner.size(); i++) + for(size_t i = 0; i < cpu_corner.size(); i++) { if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10) ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10)) From 8a4090fe3f6daa00f5421e2b9fefd2f14790c126 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Fri, 31 May 2013 17:27:42 +0400 Subject: [PATCH 09/27] Make AutoLock noncopyable (it would break on copying, anyway). --- modules/core/include/opencv2/core/core.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/core/include/opencv2/core/core.hpp b/modules/core/include/opencv2/core/core.hpp index 1c8e0e2ca..2b7791958 100644 --- a/modules/core/include/opencv2/core/core.hpp +++ b/modules/core/include/opencv2/core/core.hpp @@ -4813,6 +4813,9 @@ public: ~AutoLock() { mutex->unlock(); } protected: Mutex* mutex; +private: + AutoLock(const AutoLock&); + AutoLock& operator = (const AutoLock&); }; } From 6f006e50dc2cff6ba8b316640d769db1ac08e2ca Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Fri, 31 May 2013 18:58:30 +0400 Subject: [PATCH 10/27] setting 'char' to be signed by default since some tests fail when it's wrong (e.g. native compilation on ARM Linux) --- cmake/OpenCVCompilerOptions.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index aeed112ae..bded2d41f 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -47,6 +47,9 @@ macro(add_extra_compiler_option option) endif() endmacro() +# some OpenCV tests fail when 'char' is 'unsigned' by default +add_extra_compiler_option(-fsigned-char) + if(MINGW) # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40838 # here we are trying to workaround the problem From 081c47e3df9819e025520ea7b77cc627cd4ccdbe Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Fri, 31 May 2013 19:55:51 +0400 Subject: [PATCH 11/27] making the comment less ambigous --- cmake/OpenCVCompilerOptions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index bded2d41f..7a91b188a 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -47,7 +47,7 @@ macro(add_extra_compiler_option option) endif() endmacro() -# some OpenCV tests fail when 'char' is 'unsigned' by default +# OpenCV fails some tests when 'char' is 'unsigned' by default add_extra_compiler_option(-fsigned-char) if(MINGW) From ff28bf831f156eceba443479fc08aafbe32b1ee6 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 3 Jun 2013 14:01:04 +0400 Subject: [PATCH 12/27] disabled samples with driver api --- samples/gpu/cascadeclassifier_nvidia_api.cpp | 15 ++++++++++++--- samples/gpu/driver_api_multi.cpp | 6 +++++- samples/gpu/driver_api_stereo_multi.cpp | 6 +++++- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/samples/gpu/cascadeclassifier_nvidia_api.cpp b/samples/gpu/cascadeclassifier_nvidia_api.cpp index 99c95ab97..98195b35c 100644 --- a/samples/gpu/cascadeclassifier_nvidia_api.cpp +++ b/samples/gpu/cascadeclassifier_nvidia_api.cpp @@ -17,12 +17,21 @@ using namespace std; using namespace cv; -#if !defined(HAVE_CUDA) +#if !defined(HAVE_CUDA) || defined(__arm__) + int main( int, const char** ) { - cout << "Please compile the library with CUDA support" << endl; - return -1; +#if !defined(HAVE_CUDA) + std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true)." << std::endl; +#endif + +#if defined(__arm__) + std::cout << "Unsupported for ARM CUDA library." << std::endl; +#endif + + return 0; } + #else diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp index 2d743f0e9..c829830e7 100644 --- a/samples/gpu/driver_api_multi.cpp +++ b/samples/gpu/driver_api_multi.cpp @@ -11,7 +11,7 @@ #include "opencv2/core/core.hpp" #include "opencv2/gpu/gpu.hpp" -#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) +#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__) int main() { @@ -23,6 +23,10 @@ int main() std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n"; #endif +#if defined(__arm__) + std::cout << "Unsupported for ARM CUDA library." << std::endl; +#endif + return 0; } diff --git a/samples/gpu/driver_api_stereo_multi.cpp b/samples/gpu/driver_api_stereo_multi.cpp index 10c397477..d4d0af451 100644 --- a/samples/gpu/driver_api_stereo_multi.cpp +++ b/samples/gpu/driver_api_stereo_multi.cpp @@ -13,7 +13,7 @@ #include "opencv2/highgui/highgui.hpp" #include "opencv2/gpu/gpu.hpp" -#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) +#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__) int main() { @@ -25,6 +25,10 @@ int main() std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n"; #endif +#if defined(__arm__) + std::cout << "Unsupported for ARM CUDA library." << std::endl; +#endif + return 0; } From dc937c10f994029c7c83b2f2ffa8106ca1354257 Mon Sep 17 00:00:00 2001 From: yao Date: Tue, 4 Jun 2013 11:31:54 +0800 Subject: [PATCH 13/27] change a test image of pyrlk --- modules/ocl/perf/perf_opticalflow.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ocl/perf/perf_opticalflow.cpp b/modules/ocl/perf/perf_opticalflow.cpp index 988797020..97283b206 100644 --- a/modules/ocl/perf/perf_opticalflow.cpp +++ b/modules/ocl/perf/perf_opticalflow.cpp @@ -48,8 +48,8 @@ ///////////// PyrLKOpticalFlow //////////////////////// PERFTEST(PyrLKOpticalFlow) { - std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"}; - std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"}; + std::string images1[] = {"rubberwhale1.png", "basketball1.png"}; + std::string images2[] = {"rubberwhale2.png", "basketball2.png"}; for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++) { From a7a94de74ae46f99b8ab7606dab1faef93c6e7fa Mon Sep 17 00:00:00 2001 From: peng xiao Date: Tue, 4 Jun 2013 15:55:33 +0800 Subject: [PATCH 14/27] Fix a bug of gfft. When user provided corners buffer is big enough to be copied to from tmpCorners_, we allow the buffer to be reused other than allocate a new cl_mem object. --- modules/ocl/src/gfft.cpp | 3 ++- modules/ocl/test/test_optflow.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/ocl/src/gfft.cpp b/modules/ocl/src/gfft.cpp index af7580bd4..7fd5e3a17 100644 --- a/modules/ocl/src/gfft.cpp +++ b/modules/ocl/src/gfft.cpp @@ -257,7 +257,8 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, if (minDistance < 1) { - corners = tmpCorners_(Rect(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1)); + Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1); + tmpCorners_(roi_range).copyTo(corners); } else { diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp index 0121be8f9..72689f3f7 100644 --- a/modules/ocl/test/test_optflow.cpp +++ b/modules/ocl/test/test_optflow.cpp @@ -121,7 +121,7 @@ TEST_P(GoodFeaturesToTrack, EmptyCorners) cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance); - cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0)); + cv::ocl::oclMat src(100, 128, CV_8UC1, cv::Scalar::all(0)); cv::ocl::oclMat corners(1, maxCorners, CV_32FC2); detector(src, corners); From f049aa7674f3f3ef985f14e65de3debfeb163b3a Mon Sep 17 00:00:00 2001 From: yao Date: Tue, 4 Jun 2013 15:59:21 +0800 Subject: [PATCH 15/27] use GoodFeaturesToTrackDetector_OCL --- samples/ocl/pyrlk_optical_flow.cpp | 54 ++++++++++++++---------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp index 1b2b1d379..cc8d886f7 100644 --- a/samples/ocl/pyrlk_optical_flow.cpp +++ b/samples/ocl/pyrlk_optical_flow.cpp @@ -29,6 +29,7 @@ static double getTime(){ static void download(const oclMat& d_mat, vector& vec) { + vec.clear(); vec.resize(d_mat.cols); Mat mat(1, d_mat.cols, CV_32FC2, (void*)&vec[0]); d_mat.download(mat); @@ -36,6 +37,7 @@ static void download(const oclMat& d_mat, vector& vec) static void download(const oclMat& d_mat, vector& vec) { + vec.clear(); vec.resize(d_mat.cols); Mat mat(1, d_mat.cols, CV_8UC1, (void*)&vec[0]); d_mat.download(mat); @@ -119,14 +121,15 @@ int main(int argc, const char* argv[]) bool useCPU = cmd.get("s"); bool useCamera = cmd.get("c"); int inputName = cmd.get("c"); - oclMat d_nextPts, d_status; + oclMat d_nextPts, d_status; + GoodFeaturesToTrackDetector_OCL d_features(points); Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE); Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE); PyrLKOpticalFlow d_pyrLK; - vector pts; - vector nextPts; - vector status; + vector pts(points); + vector nextPts(points); + vector status(points); vector err; if (frame0.empty() || frame1.empty()) @@ -199,29 +202,24 @@ int main(int argc, const char* argv[]) ptr1 = frame0Gray; } - pts.clear(); - - cv::goodFeaturesToTrack(ptr0, pts, points, 0.01, 0.0); - - if (pts.size() == 0) - { - continue; - } - if (useCPU) { - cv::calcOpticalFlowPyrLK(ptr0, ptr1, pts, nextPts, status, err); + pts.clear(); + goodFeaturesToTrack(ptr0, pts, points, 0.01, 0.0); + if(pts.size() == 0) + continue; + calcOpticalFlowPyrLK(ptr0, ptr1, pts, nextPts, status, err); } else { - oclMat d_prevPts(1, points, CV_32FC2, (void*)&pts[0]); - - d_pyrLK.sparse(oclMat(ptr0), oclMat(ptr1), d_prevPts, d_nextPts, d_status); - - download(d_prevPts, pts); + oclMat d_img(ptr0), d_prevPts; + d_features(d_img, d_prevPts); + if(!d_prevPts.rows || !d_prevPts.cols) + continue; + d_pyrLK.sparse(d_img, oclMat(ptr1), d_prevPts, d_nextPts, d_status); + d_features.downloadPoints(d_prevPts,pts); download(d_nextPts, nextPts); download(d_status, status); - } if (i%2 == 1) frame1.copyTo(frameCopy); @@ -246,21 +244,19 @@ nocamera: for(int i = 0; i <= LOOP_NUM;i ++) { cout << "loop" << i << endl; - if (i > 0) workBegin(); - - cv::goodFeaturesToTrack(frame0, pts, points, 0.01, minDist); + if (i > 0) workBegin(); if (useCPU) { - cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); + goodFeaturesToTrack(frame0, pts, points, 0.01, minDist); + calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); } else { - oclMat d_prevPts(1, points, CV_32FC2, (void*)&pts[0]); - - d_pyrLK.sparse(oclMat(frame0), oclMat(frame1), d_prevPts, d_nextPts, d_status); - - download(d_prevPts, pts); + oclMat d_img(frame0), d_prevPts; + d_features(d_img, d_prevPts); + d_pyrLK.sparse(d_img, oclMat(frame1), d_prevPts, d_nextPts, d_status); + d_features.downloadPoints(d_prevPts, pts); download(d_nextPts, nextPts); download(d_status, status); } From 3aea7e8f8dc4cf572d919d81b14d1f69c9bd2f0d Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 4 Jun 2013 12:51:36 +0400 Subject: [PATCH 16/27] fixed gpu module build on arm platform links with CUDA driver library only if we use video encoding/decoding --- modules/gpu/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index 87c172da5..a471da008 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -42,10 +42,10 @@ if(HAVE_CUDA) ocv_cuda_compile(cuda_objs ${lib_cuda} ${ncv_cuda}) - set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_npp_LIBRARY}) + set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) if(WITH_NVCUVID) - set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvid_LIBRARY}) + set(cuda_link_libs ${cuda_link_libs} ${CUDA_CUDA_LIBRARY} ${CUDA_nvcuvid_LIBRARY}) endif() if(WIN32) From 918381875adb0b4cb4fd9a5d061b47ba99b3db7e Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 4 Jun 2013 13:57:35 +0400 Subject: [PATCH 17/27] rewrite gpu/device/vec_math.hpp file old version isn't compiled with CUDA 5.5 new version doesn't depend on functional.hpp --- .../include/opencv2/gpu/device/vec_math.hpp | 1106 +++++++++++++---- modules/gpu/src/cuda/ccomponetns.cu | 4 +- modules/gpu/src/cuda/hough.cu | 5 +- 3 files changed, 854 insertions(+), 261 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/device/vec_math.hpp b/modules/gpu/include/opencv2/gpu/device/vec_math.hpp index 1c46dc0c3..a6cb43a2f 100644 --- a/modules/gpu/include/opencv2/gpu/device/vec_math.hpp +++ b/modules/gpu/include/opencv2/gpu/device/vec_math.hpp @@ -43,288 +43,880 @@ #ifndef __OPENCV_GPU_VECMATH_HPP__ #define __OPENCV_GPU_VECMATH_HPP__ -#include "saturate_cast.hpp" #include "vec_traits.hpp" -#include "functional.hpp" +#include "saturate_cast.hpp" namespace cv { namespace gpu { namespace device { - namespace vec_math_detail - { - template struct SatCastHelper; - template struct SatCastHelper<1, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x)); - } - }; - template struct SatCastHelper<2, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y)); - } - }; - template struct SatCastHelper<3, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z)); - } - }; - template struct SatCastHelper<4, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z), saturate_cast(v.w)); - } - }; - template static __device__ __forceinline__ VecD saturate_cast_caller(const VecS& v) +// saturate_cast + +namespace vec_math_detail +{ + template struct SatCastHelper; + template struct SatCastHelper<1, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) { - return SatCastHelper::cn, VecD>::cast(v); + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x)); } - } - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - -#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \ - __device__ __forceinline__ TypeVec::result_type, 1>::vec_type op(const type ## 1 & a) \ - { \ - func f; \ - return VecTraits::result_type, 1>::vec_type>::make(f(a.x)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 2>::vec_type op(const type ## 2 & a) \ - { \ - func f; \ - return VecTraits::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 3>::vec_type op(const type ## 3 & a) \ - { \ - func f; \ - return VecTraits::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 4>::vec_type op(const type ## 4 & a) \ - { \ - func f; \ - return VecTraits::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \ - } - - namespace vec_math_detail + }; + template struct SatCastHelper<2, VecD> { - template struct BinOpTraits + template static __device__ __forceinline__ VecD cast(const VecS& v) { - typedef int argument_type; - }; - template struct BinOpTraits + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y)); + } + }; + template struct SatCastHelper<3, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) { - typedef T argument_type; - }; - template struct BinOpTraits + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z)); + } + }; + template struct SatCastHelper<4, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) { - typedef double argument_type; - }; - template struct BinOpTraits - { - typedef double argument_type; - }; - template <> struct BinOpTraits - { - typedef double argument_type; - }; - template struct BinOpTraits - { - typedef float argument_type; - }; - template struct BinOpTraits - { - typedef float argument_type; - }; - template <> struct BinOpTraits - { - typedef float argument_type; - }; - template <> struct BinOpTraits - { - typedef double argument_type; - }; - template <> struct BinOpTraits - { - typedef double argument_type; - }; + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z), saturate_cast(v.w)); + } + }; + + template static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v) + { + return SatCastHelper::cn, VecD>::cast(v); + } +} + +template static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper(v);} + +// unary operators + +#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \ + { \ + return VecTraits::make(op (a.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y), op (a.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y), op (a.z), op (a.w)); \ } -#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \ - __device__ __forceinline__ TypeVec::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \ +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP + +// unary functions + +#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \ { \ - func f; \ - return VecTraits::result_type, 1>::vec_type>::make(f(a.x, b.x)); \ + return VecTraits::make(func (a.x)); \ } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \ { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \ + return VecTraits::make(func (a.x), func (a.y)); \ } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \ { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \ + return VecTraits::make(func (a.x), func (a.y), func (a.z)); \ } \ - __device__ __forceinline__ TypeVec::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \ { \ - func f; \ - return VecTraits::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \ - { \ - func f; \ - return VecTraits::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \ - { \ - func f; \ - return VecTraits::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \ + return VecTraits::make(func (a.x), func (a.y), func (a.z), func (a.w)); \ } -#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, abs, abs_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double) -#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \ - OPENCV_GPU_IMPLEMENT_VEC_OP(type) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint) - OPENCV_GPU_IMPLEMENT_VEC_OP(float) - OPENCV_GPU_IMPLEMENT_VEC_OP(double) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double) + +#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC + +// binary operators (vec & vec) + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return VecTraits::make(a.x op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ + } + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP + +// binary operators (vec & scalar) + +#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s); \ + } \ + __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \ + { \ + return VecTraits::make(s op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s, a.z op s); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y, s op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s, a.z op s, a.w op s); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y, s op b.z, s op b.w); \ + } + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP + +// binary function (vec & vec) + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return VecTraits::make(func (a.x, b.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \ + } + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC + +// binary function (vec & scalar) + +#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \ + } + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double) + +#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC - #undef OPENCV_GPU_IMPLEMENT_VEC_UNOP - #undef OPENCV_GPU_IMPLEMENT_VEC_BINOP - #undef OPENCV_GPU_IMPLEMENT_VEC_OP - #undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP }}} // namespace cv { namespace gpu { namespace device #endif // __OPENCV_GPU_VECMATH_HPP__ diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu index 7f3d4ae33..c4d79bd80 100644 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ b/modules/gpu/src/cuda/ccomponetns.cu @@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace device template __device__ __forceinline__ bool operator() (const I& a, const I& b) const { - I d = a - b; + I d = saturate_cast(a - b); return lo.x <= d.x && d.x <= hi.x && lo.y <= d.y && d.y <= hi.y && lo.z <= d.z && d.z <= hi.z; @@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace device template __device__ __forceinline__ bool operator() (const I& a, const I& b) const { - I d = a - b; + I d = saturate_cast(a - b); return lo.x <= d.x && d.x <= hi.x && lo.y <= d.y && d.y <= hi.y && lo.z <= d.z && d.z <= hi.z && diff --git a/modules/gpu/src/cuda/hough.cu b/modules/gpu/src/cuda/hough.cu index faec89b95..59eba2608 100644 --- a/modules/gpu/src/cuda/hough.cu +++ b/modules/gpu/src/cuda/hough.cu @@ -48,6 +48,7 @@ #include "opencv2/gpu/device/common.hpp" #include "opencv2/gpu/device/emulation.hpp" #include "opencv2/gpu/device/vec_math.hpp" +#include "opencv2/gpu/device/functional.hpp" #include "opencv2/gpu/device/limits.hpp" #include "opencv2/gpu/device/dynamic_smem.hpp" @@ -811,7 +812,7 @@ namespace cv { namespace gpu { namespace device const int ind = ::atomicAdd(r_sizes + n, 1); if (ind < maxSize) - r_table(n, ind) = p - templCenter; + r_table(n, ind) = saturate_cast(p - templCenter); } void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, @@ -855,7 +856,7 @@ namespace cv { namespace gpu { namespace device for (int j = 0; j < r_row_size; ++j) { - short2 c = p - r_row[j]; + int2 c = p - r_row[j]; c.x = __float2int_rn(c.x * idp); c.y = __float2int_rn(c.y * idp); From 516e5b2563100329882948f68aceb15b5fe8fe60 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 4 Jun 2013 13:58:45 +0400 Subject: [PATCH 18/27] fixed BroxOpticalFlow regression test the output of BroxOpticalFlow differs a bit in CUDA 5.5 --- modules/gpu/test/test_optflow.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/gpu/test/test_optflow.cpp b/modules/gpu/test/test_optflow.cpp index 9e30b9208..53b93a096 100644 --- a/modules/gpu/test/test_optflow.cpp +++ b/modules/gpu/test/test_optflow.cpp @@ -102,8 +102,8 @@ GPU_TEST_P(BroxOpticalFlow, Regression) for (int i = 0; i < v_gold.rows; ++i) f.read(v_gold.ptr(i), v_gold.cols * sizeof(float)); - EXPECT_MAT_NEAR(u_gold, u, 0); - EXPECT_MAT_NEAR(v_gold, v, 0); + EXPECT_MAT_SIMILAR(u_gold, u, 1e-3); + EXPECT_MAT_SIMILAR(v_gold, v, 1e-3); #else std::ofstream f(fname.c_str(), std::ios_base::binary); From 4a770535c475be597f7487833927f0d5ab084988 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 4 Jun 2013 14:59:47 +0400 Subject: [PATCH 19/27] fixed BoxFilter sanity test (different rounding results) --- modules/gpu/perf/perf_filters.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gpu/perf/perf_filters.cpp b/modules/gpu/perf/perf_filters.cpp index 40d88aad4..adfc294f6 100644 --- a/modules/gpu/perf/perf_filters.cpp +++ b/modules/gpu/perf/perf_filters.cpp @@ -72,7 +72,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize)); - GPU_SANITY_CHECK(dst); + GPU_SANITY_CHECK(dst, 1); } else { From 89f3c40d791a6dd1951fd23ba7c1d9a8df8f1454 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 4 Jun 2013 15:01:06 +0400 Subject: [PATCH 20/27] fixed BroxOpticalFlow sanity test (increase epsilon value) + interpolateFrames and createOpticalFlowNeedleMap --- modules/gpu/perf/perf_video.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp index 1ab01a75b..672d657b2 100644 --- a/modules/gpu/perf/perf_video.cpp +++ b/modules/gpu/perf/perf_video.cpp @@ -103,7 +103,7 @@ PERF_TEST_P(ImagePair, Video_InterpolateFrames, TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf); - GPU_SANITY_CHECK(newFrame); + GPU_SANITY_CHECK(newFrame, 1e-4); } else { @@ -142,7 +142,7 @@ PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap, TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors); - GPU_SANITY_CHECK(vertex); + GPU_SANITY_CHECK(vertex, 1e-6); GPU_SANITY_CHECK(colors); } else @@ -219,8 +219,8 @@ PERF_TEST_P(ImagePair, Video_BroxOpticalFlow, TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v); - GPU_SANITY_CHECK(u); - GPU_SANITY_CHECK(v); + GPU_SANITY_CHECK(u, 1e-1); + GPU_SANITY_CHECK(v, 1e-1); } else { From 93a44d423618c7c6c04135dd99b47209cc88ead6 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 5 Jun 2013 13:51:11 +0400 Subject: [PATCH 21/27] Fix typo in .gitattributes. --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index af704cdf0..cd4359ba3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,7 +33,7 @@ CMakeLists.txt text whitespace=tabwidth=2 *.png binary -*.jepg binary +*.jpeg binary *.jpg binary *.exr binary *.ico binary From 31a5f7ef3c2187b82a91290a430651adcc0b19d1 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 5 Jun 2013 14:08:55 +0400 Subject: [PATCH 22/27] fixed bug #3069 (infinite loop in GPU LBP Cascade detectMultiScale) --- modules/gpu/src/cascadeclassifier.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp index 814a96bc0..7b95b6909 100644 --- a/modules/gpu/src/cascadeclassifier.cpp +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -406,7 +406,7 @@ public: GpuMat dclassified(1, 1, CV_32S); cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) ); - PyrLavel level(0, 1.0f, image.size(), NxM, minObjectSize); + PyrLavel level(0, scaleFactor, image.size(), NxM, minObjectSize); while (level.isFeasible(maxObjectSize)) { From a2adafd5089065d44f82933c45cc4e7591141ed9 Mon Sep 17 00:00:00 2001 From: caorong Date: Thu, 16 May 2013 11:26:37 +0800 Subject: [PATCH 23/27] fix a bug(DetectorType never change) changed line281 -> line220 Presentation: because line 220 give the globle var mDetectorType,and in line 230 it will be compared with mDeteorType !!! it will never be unequal ~ fix: change mDetectorType(previous globle var) to a new local val tmpDetectorType --- .../src/org/opencv/samples/facedetect/FdActivity.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java b/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java index 23727739e..b06b2cc1c 100644 --- a/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java +++ b/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java @@ -215,9 +215,9 @@ public class FdActivity extends Activity implements CvCameraViewListener2 { else if (item == mItemFace20) setMinFaceSize(0.2f); else if (item == mItemType) { - mDetectorType = (mDetectorType + 1) % mDetectorName.length; - item.setTitle(mDetectorName[mDetectorType]); - setDetectorType(mDetectorType); + int tmpDetectorType = (mDetectorType + 1) % mDetectorName.length; + item.setTitle(mDetectorName[tmpDetectorType]); + setDetectorType(tmpDetectorType); } return true; } From 985bfea556ff0511eb16d99ef83a6ef78c80e38d Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 5 Jun 2013 15:54:27 +0400 Subject: [PATCH 24/27] Don't resolve symlinks when looking for modules. We don't really need it, it makes the code longer, and it can lead to inconsistent paths when OpenCV is itself inside a symlink. --- cmake/OpenCVModule.cmake | 4 ++-- cmake/OpenCVUtils.cmake | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 8312845fe..e8619ad7d 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -303,7 +303,7 @@ macro(ocv_glob_modules) # collect modules set(OPENCV_INITIAL_PASS ON) foreach(__path ${ARGN}) - ocv_get_real_path(__path "${__path}") + get_filename_component(__path "${__path}" ABSOLUTE) list(FIND __directories_observed "${__path}" __pathIdx) if(__pathIdx GREATER -1) @@ -315,7 +315,7 @@ macro(ocv_glob_modules) if(__ocvmodules) list(SORT __ocvmodules) foreach(mod ${__ocvmodules}) - ocv_get_real_path(__modpath "${__path}/${mod}") + get_filename_component(__modpath "${__path}/${mod}" ABSOLUTE) if(EXISTS "${__modpath}/CMakeLists.txt") list(FIND __directories_observed "${__modpath}" __pathIdx) diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index db24c9970..5547a113c 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -411,16 +411,6 @@ macro(ocv_regex_escape var regex) endmacro() -# get absolute path with symlinks resolved -macro(ocv_get_real_path VAR PATHSTR) - if(CMAKE_VERSION VERSION_LESS 2.8) - get_filename_component(${VAR} "${PATHSTR}" ABSOLUTE) - else() - get_filename_component(${VAR} "${PATHSTR}" REALPATH) - endif() -endmacro() - - # convert list of paths to full paths macro(ocv_convert_to_full_paths VAR) if(${VAR}) From a954d3630f8a567317a105300c8ccd735f2550a4 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 5 Jun 2013 18:09:47 +0400 Subject: [PATCH 25/27] Add support for adding custom OpenCV modules. --- CMakeLists.txt | 4 ++++ modules/CMakeLists.txt | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5f1edaa53..2c735e005 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -286,6 +286,10 @@ set(OPENCV_CONFIG_FILE_INCLUDE_DIR "${CMAKE_BINARY_DIR}/" CACHE PATH "Where to c add_definitions(-DHAVE_CVCONFIG_H) ocv_include_directories(${OPENCV_CONFIG_FILE_INCLUDE_DIR}) +# ---------------------------------------------------------------------------- +# Path for additional modules +# ---------------------------------------------------------------------------- +set(OPENCV_EXTRA_MODULES_PATH "" CACHE PATH "Where to look for additional OpenCV modules") # ---------------------------------------------------------------------------- # Autodetect if we are in a GIT repository diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt index 4a6ed6d11..3e1ad708e 100644 --- a/modules/CMakeLists.txt +++ b/modules/CMakeLists.txt @@ -2,4 +2,4 @@ if(NOT OPENCV_MODULES_PATH) set(OPENCV_MODULES_PATH "${CMAKE_CURRENT_SOURCE_DIR}") endif() -ocv_glob_modules(${OPENCV_MODULES_PATH}) +ocv_glob_modules(${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH}) From 429f84e59e45a66f56540e67f5ab3a9c8dc34249 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Thu, 6 Jun 2013 11:44:35 +0800 Subject: [PATCH 26/27] Fix a bug of cornerHarris and cornerMinEigenVal. The bug is a buffer overrun when border type is reflect101. It is found that gfft crashed with input of size 100x100 on Intel CPU. --- modules/ocl/src/opencl/imgproc_calcHarris.cl | 15 ++++++++------- .../ocl/src/opencl/imgproc_calcMinEigenVal.cl | 16 +++++++++------- modules/ocl/test/test_optflow.cpp | 2 +- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/modules/ocl/src/opencl/imgproc_calcHarris.cl b/modules/ocl/src/opencl/imgproc_calcHarris.cl index 15742d6c5..1911a7201 100644 --- a/modules/ocl/src/opencl/imgproc_calcHarris.cl +++ b/modules/ocl/src/opencl/imgproc_calcHarris.cl @@ -130,28 +130,29 @@ __kernel void calcHarris(__global const float *Dx,__global const float *Dy, __gl data[2][i] = dy_data[i] * dy_data[i]; } #else - for(int i=0; i < ksY+1; i++) - { + int clamped_col = min(dst_cols, col); + for(int i=0; i < ksY+1; i++) + { int dx_selected_row; int dx_selected_col; dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows); dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row); - dx_selected_col = ADDR_L(dx_startX+col, 0, dx_whole_cols); - dx_selected_col = ADDR_R(dx_startX+col, dx_whole_cols, dx_selected_col); + dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols); + dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col); dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col]; int dy_selected_row; int dy_selected_col; dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows); dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row); - dy_selected_col = ADDR_L(dy_startX+col, 0, dy_whole_cols); - dy_selected_col = ADDR_R(dy_startX+col, dy_whole_cols, dy_selected_col); + dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols); + dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col); dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col]; data[0][i] = dx_data[i] * dx_data[i]; data[1][i] = dx_data[i] * dy_data[i]; data[2][i] = dy_data[i] * dy_data[i]; - } + } #endif float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0; for(int i=1; i < ksY; i++) diff --git a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl index 662fbb07b..462ec7792 100644 --- a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl +++ b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl @@ -130,28 +130,30 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy, data[2][i] = dy_data[i] * dy_data[i]; } #else - for(int i=0; i < ksY+1; i++) - { + int clamped_col = min(dst_cols, col); + + for(int i=0; i < ksY+1; i++) + { int dx_selected_row; int dx_selected_col; dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows); dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row); - dx_selected_col = ADDR_L(dx_startX+col, 0, dx_whole_cols); - dx_selected_col = ADDR_R(dx_startX+col, dx_whole_cols, dx_selected_col); + dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols); + dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col); dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col]; int dy_selected_row; int dy_selected_col; dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows); dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row); - dy_selected_col = ADDR_L(dy_startX+col, 0, dy_whole_cols); - dy_selected_col = ADDR_R(dy_startX+col, dy_whole_cols, dy_selected_col); + dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols); + dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col); dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col]; data[0][i] = dx_data[i] * dx_data[i]; data[1][i] = dx_data[i] * dy_data[i]; data[2][i] = dy_data[i] * dy_data[i]; - } + } #endif float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0; for(int i=1; i < ksY; i++) diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp index 72689f3f7..0121be8f9 100644 --- a/modules/ocl/test/test_optflow.cpp +++ b/modules/ocl/test/test_optflow.cpp @@ -121,7 +121,7 @@ TEST_P(GoodFeaturesToTrack, EmptyCorners) cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance); - cv::ocl::oclMat src(100, 128, CV_8UC1, cv::Scalar::all(0)); + cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0)); cv::ocl::oclMat corners(1, maxCorners, CV_32FC2); detector(src, corners); From 8714cbac91e6e8f90395d9af3fd57f3c6b35235b Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 6 Jun 2013 14:09:33 +0400 Subject: [PATCH 27/27] Fix a missing header path when building with Qt 4. Also, removing explicit include path configuration, since QT_USE_FILE takes care of that. --- cmake/OpenCVFindLibsGUI.cmake | 2 +- modules/highgui/CMakeLists.txt | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake index 2ea864c16..59ce1cd05 100644 --- a/cmake/OpenCVFindLibsGUI.cmake +++ b/cmake/OpenCVFindLibsGUI.cmake @@ -33,7 +33,7 @@ if(WITH_QT) endif() if(NOT HAVE_QT) - find_package(Qt4) + find_package(Qt4 REQUIRED QtCore QtGui QtTest) if(QT4_FOUND) set(HAVE_QT TRUE) add_definitions(-DHAVE_QT) # We need to define the macro this way, using cvconfig.h does not work diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index 4c60867af..fad2562c8 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -95,14 +95,10 @@ elseif(HAVE_QT) endif() include(${QT_USE_FILE}) - if(QT_INCLUDE_DIR) - ocv_include_directories(${QT_INCLUDE_DIR}) - endif() - QT4_ADD_RESOURCES(_RCC_OUTFILES src/window_QT.qrc) QT4_WRAP_CPP(_MOC_OUTFILES src/window_QT.h) - list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES} ${QT_QTTEST_LIBRARY}) + list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES}) list(APPEND highgui_srcs src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES}) ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag) if(${_have_flag})