From 5539e85a1179c51da0b709fa48a516ca67370847 Mon Sep 17 00:00:00 2001 From: yao <bitwangyaoyao@gmail.com> Date: Wed, 27 Mar 2013 12:04:48 +0800 Subject: [PATCH] use perf test replace performance sample --- modules/ocl/perf/interpolation.hpp | 120 - modules/ocl/perf/main.cpp | 200 +- modules/ocl/perf/perf_arithm.cpp | 4977 +++-------------- modules/ocl/perf/perf_blend.cpp | 134 +- modules/ocl/perf/perf_brute_force_matcher.cpp | 150 + modules/ocl/perf/perf_canny.cpp | 122 +- modules/ocl/perf/perf_color.cpp | 91 + modules/ocl/perf/perf_columnsum.cpp | 112 +- modules/ocl/perf/perf_fft.cpp | 105 +- modules/ocl/perf/perf_filters.cpp | 1349 +---- modules/ocl/perf/perf_gemm.cpp | 105 +- modules/ocl/perf/perf_haar.cpp | 198 +- modules/ocl/perf/perf_hog.cpp | 150 +- modules/ocl/perf/perf_imgproc.cpp | 2683 +++------ modules/ocl/perf/perf_match_template.cpp | 278 +- modules/ocl/perf/perf_matrix_operation.cpp | 781 +-- modules/ocl/perf/perf_norm.cpp | 84 + modules/ocl/perf/perf_pyrdown.cpp | 126 +- modules/ocl/perf/perf_pyrlk.cpp | 143 + modules/ocl/perf/perf_pyrup.cpp | 109 +- modules/ocl/perf/perf_split_merge.cpp | 519 +- modules/ocl/perf/precomp.cpp | 330 +- modules/ocl/perf/precomp.hpp | 386 +- modules/ocl/perf/utility.cpp | 265 - modules/ocl/perf/utility.hpp | 182 - samples/ocl/performance.cpp | 4397 --------------- 26 files changed, 3791 insertions(+), 14305 deletions(-) delete mode 100644 modules/ocl/perf/interpolation.hpp create mode 100644 modules/ocl/perf/perf_brute_force_matcher.cpp create mode 100644 modules/ocl/perf/perf_color.cpp create mode 100644 modules/ocl/perf/perf_norm.cpp create mode 100644 modules/ocl/perf/perf_pyrlk.cpp delete mode 100644 modules/ocl/perf/utility.cpp delete mode 100644 modules/ocl/perf/utility.hpp delete mode 100644 samples/ocl/performance.cpp diff --git a/modules/ocl/perf/interpolation.hpp b/modules/ocl/perf/interpolation.hpp deleted file mode 100644 index fb89e701d..000000000 --- a/modules/ocl/perf/interpolation.hpp +++ /dev/null @@ -1,120 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_TEST_INTERPOLATION_HPP__ -#define __OPENCV_TEST_INTERPOLATION_HPP__ - -template <typename T> T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) -{ - if (border_type == cv::BORDER_CONSTANT) - return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]); - - return src.at<T>(cv::borderInterpolate(y, src.rows, border_type), cv::borderInterpolate(x, src.cols, border_type) * src.channels() + c); -} - -template <typename T> struct NearestInterpolator -{ - static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) - { - return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal); - } -}; - -template <typename T> struct LinearInterpolator -{ - static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) - { - x -= 0.5f; - y -= 0.5f; - - int x1 = cvFloor(x); - int y1 = cvFloor(y); - int x2 = x1 + 1; - int y2 = y1 + 1; - - float res = 0; - - res += readVal<T>(src, y1, x1, c, border_type, borderVal) * ((x2 - x) * (y2 - y)); - res += readVal<T>(src, y1, x2, c, border_type, borderVal) * ((x - x1) * (y2 - y)); - res += readVal<T>(src, y2, x1, c, border_type, borderVal) * ((x2 - x) * (y - y1)); - res += readVal<T>(src, y2, x2, c, border_type, borderVal) * ((x - x1) * (y - y1)); - - return cv::saturate_cast<T>(res); - } -}; - -template <typename T> struct CubicInterpolator -{ - static float getValue(float p[4], float x) - { - return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0]))); - } - - static float getValue(float p[4][4], float x, float y) - { - float arr[4]; - - arr[0] = getValue(p[0], x); - arr[1] = getValue(p[1], x); - arr[2] = getValue(p[2], x); - arr[3] = getValue(p[3], x); - - return getValue(arr, y); - } - - static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) - { - int ix = cvRound(x); - int iy = cvRound(y); - - float vals[4][4] = - { - {readVal<T>(src, iy - 2, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 2, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 2, ix, c, border_type, borderVal), readVal<T>(src, iy - 2, ix + 1, c, border_type, borderVal)}, - {readVal<T>(src, iy - 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 1, ix, c, border_type, borderVal), readVal<T>(src, iy - 1, ix + 1, c, border_type, borderVal)}, - {readVal<T>(src, iy , ix - 2, c, border_type, borderVal), readVal<T>(src, iy , ix - 1, c, border_type, borderVal), readVal<T>(src, iy , ix, c, border_type, borderVal), readVal<T>(src, iy , ix + 1, c, border_type, borderVal)}, - {readVal<T>(src, iy + 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy + 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy + 1, ix, c, border_type, borderVal), readVal<T>(src, iy + 1, ix + 1, c, border_type, borderVal)}, - }; - - return cv::saturate_cast<T>(getValue(vals, (x - ix + 2.0) / 4.0, (y - iy + 2.0) / 4.0)); - } -}; - -#endif // __OPENCV_TEST_INTERPOLATION_HPP__ diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index e517a371d..2da17755e 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -7,12 +7,13 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. -// + // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -21,12 +22,12 @@ // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. +// and/or other oclMaterials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -41,129 +42,118 @@ #include "precomp.hpp" -#ifdef HAVE_OPENCL - -using namespace std; -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; - -void print_info() +int main(int argc, const char *argv[]) { - printf("\n"); -#if defined _WIN32 -# if defined _WIN64 - puts("OS: Windows 64"); -# else - puts("OS: Windows 32"); -# endif -#elif defined linux -# if defined _LP64 - puts("OS: Linux 64"); -# else - puts("OS: Linux 32"); -# endif -#elif defined __APPLE__ -# if defined _LP64 - puts("OS: Apple 64"); -# else - puts("OS: Apple 32"); -# endif -#endif + vector<ocl::Info> oclinfo; + int num_devices = getDevice(oclinfo); + + if (num_devices < 1) + { + cerr << "no device found\n"; + return -1; + } + + int devidx = 0; + + for (size_t i = 0; i < oclinfo.size(); i++) + { + for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++) + { + printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str()); + } + } + + redirectError(cvErrorCallback); -} -std::string workdir; -int main(int argc, char **argv) -{ - TS::ptr()->init("ocl"); - InitGoogleTest(&argc, argv); const char *keys = - - "{ h | help | false | print help message }" - - "{ w | workdir | ../../../samples/c/| set working directory }" - - "{ t | type | gpu | set device type:cpu or gpu}" - - "{ p | platform | 0 | set platform id }" - - "{ d | device | 0 | set device id }"; - - + "{ h | help | false | print help message }" + "{ f | filter | | filter for test }" + "{ w | workdir | | set working directory }" + "{ l | list | false | show all tests }" + "{ d | device | 0 | device id }" + "{ i | iters | 10 | iteration count }" + "{ m | warmup | 1 | gpu warm up iteration count}" + "{ t | xtop | 1.1 | xfactor top boundary}" + "{ b | xbottom | 0.9 | xfactor bottom boundary}" + "{ v | verify | false | only run gpu once to verify if problems occur}"; CommandLineParser cmd(argc, argv, keys); if (cmd.get<bool>("help")) - { - - cout << "Avaible options besides goole test option:" << endl; - + cout << "Avaible options:" << endl; cmd.printParams(); + return 0; } - workdir = cmd.get<string>("workdir"); - - string type = cmd.get<string>("type"); - - unsigned int pid = cmd.get<unsigned int>("platform"); - int device = cmd.get<int>("device"); - - print_info(); - // int flag = CVCL_DEVICE_TYPE_GPU; - - // if(type == "cpu") - - // { - - // flag = CVCL_DEVICE_TYPE_CPU; - - // } - std::vector<cv::ocl::Info> oclinfo; - int devnums = getDevice(oclinfo); - if(devnums <= device || device < 0) - + if (device < 0 || device >= num_devices) { - - std::cout << "device invalid\n"; - + cerr << "Invalid device ID" << endl; return -1; - } - if(pid >= oclinfo.size()) - + if (cmd.get<bool>("verify")) { - - std::cout << "platform invalid\n"; - - return -1; - + TestSystem::instance().setNumIters(1); + TestSystem::instance().setGPUWarmupIters(0); + TestSystem::instance().setCPUIters(0); } - if(pid != 0 || device != 0) + devidx = 0; + for (size_t i = 0; i < oclinfo.size(); i++) { - - setDevice(oclinfo[pid], device); - + for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++) + { + if (device == devidx) + { + ocl::setDevice(oclinfo[i], (int)j); + TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]); + printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str()); + goto END_DEV; + } + } } - cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl; - setBinpath(CLBINPATH); - return RUN_ALL_TESTS(); -} +END_DEV: -#else // DON'T HAVE_OPENCL + string filter = cmd.get<string>("filter"); + string workdir = cmd.get<string>("workdir"); + bool list = cmd.get<bool>("list"); + int iters = cmd.get<int>("iters"); + int wu_iters = cmd.get<int>("warmup"); + double x_top = cmd.get<double>("xtop"); + double x_bottom = cmd.get<double>("xbottom"); + + TestSystem::instance().setTopThreshold(x_top); + TestSystem::instance().setBottomThreshold(x_bottom); + + if (!filter.empty()) + { + TestSystem::instance().setTestFilter(filter); + } + + if (!workdir.empty()) + { + if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\') + { + workdir += '/'; + } + + TestSystem::instance().setWorkingDir(workdir); + } + + if (list) + { + TestSystem::instance().setListMode(true); + } + + TestSystem::instance().setNumIters(iters); + TestSystem::instance().setGPUWarmupIters(wu_iters); + + TestSystem::instance().run(); -int main() -{ - printf("OpenCV was built without OpenCL support\n"); return 0; -} - - -#endif // HAVE_OPENCL +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp index b7f82b685..e6e957641 100644 --- a/modules/ocl/perf/perf_arithm.cpp +++ b/modules/ocl/perf/perf_arithm.cpp @@ -1,4 +1,4 @@ -/////////////////////////////////////////////////////////////////////////////////////// +/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // @@ -10,17 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Niko Li, newlife20080214@gmail.com -// Jia Haipeng, jiahaipeng95@gmail.com -// Shengen Yan, yanshengen@gmail.com -// Jiang Liyuan,jlyuan001.good@163.com -// Rock Li, Rock.Li@amd.com -// Zailong Wu, bullet@yeah.net +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -35,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -48,4371 +43,1165 @@ // //M*/ - #include "precomp.hpp" -#include <iomanip> - -#ifdef HAVE_OPENCL -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; -PARAM_TEST_CASE(ArithmTestBase, MatType, bool) +///////////// Lut //////////////////////// +TEST(lut) { - int type; - cv::Scalar val; + Mat src, lut, dst; + ocl::oclMat d_src, d_lut, d_dst; - //src mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mask; - cv::Mat dst; - cv::Mat dst1; //bak, for two outputs + int all_type[] = {CV_8UC1, CV_8UC3}; + std::string type_name[] = {"CV_8UC1", "CV_8UC3"}; - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int maskx; - int masky; - - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - cv::Mat dst1_roi; //bak - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - cv::ocl::oclMat gdst1_whole; //bak - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdst1; //bak - cv::ocl::oclMat gmask; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j]; - cv::RNG &rng = TS::ptr()->get_rng(); + gen(src, size, size, all_type[j], 0, 256); + gen(lut, 1, 256, CV_8UC1, 0, 1); + gen(dst, size, size, all_type[j], 0, 256); - cv::Size size(MWIDTH, MHEIGHT); + LUT(src, lut, dst); - mat1 = randomMat(rng, size, type, 5, 16, false); - //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); - mat2 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dst1 = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + CPU_ON; + LUT(src, lut, dst); + CPU_OFF; - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + d_src.upload(src); + d_lut.upload(lut); + + WARMUP_ON; + ocl::LUT(d_src, d_lut, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::LUT(d_src, d_lut, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_lut.upload(lut); + ocl::LUT(d_src, d_lut, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; + + } - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src2x = 1; - src1y = 1; - src2y = 1; - dstx = 1; - dsty = 1; - maskx = 1; - masky = 1; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src2x = 0; - src1y = 0; - src2y = 0; - dstx = 0; - dsty = 0; - maskx = 0; - masky = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - //mat2_roi = mat2(Rect(src2x,src2y,256,1)); - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gdst1_whole = dst1; - //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat1 = mat1_roi; - //gmat2 = mat2_roi; - //gmask = mask_roi; - } - -}; -////////////////////////////////lut///////////////////////////////////////////////// - -struct Lut : ArithmTestBase {}; - -TEST_P(Lut, Mat) -{ - - cv::Mat mat2(3, 512, CV_8UC1); - cv::RNG &rng = TS::ptr()->get_rng(); - rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256)); - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - mat2 = randomMat(rng, cv::Size(512, 3), type, 5, 16, false); - mat2_roi = mat2(Rect(src2x, src2y, 256, 1)); - - - t0 = (double)cvGetTickCount();//cpu start - cv::LUT(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::LUT(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - // s=GetParam(); - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - // src2x = rng.uniform( 0,mat2.cols - 256); - // src2y = rng.uniform (0,mat2.rows - 1); - - // cv::Mat mat2_roi = mat2(Rect(src2x,src2y,256,1)); - mat2 = randomMat(rng, cv::Size(512, 3), type, 5, 16, false); - mat2_roi = mat2(Rect(src2x, src2y, 256, 1)); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - // gdst1_whole = dst1; - // gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - // gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::LUT(gmat1, gmat2, gdst); - }; -#endif - } - - -////////////////////////////////exp///////////////////////////////////////////////// - -struct Exp : ArithmTestBase {}; - -TEST_P(Exp, Mat) +///////////// Exp //////////////////////// +TEST(Exp) { + Mat src, dst; + ocl::oclMat d_src, d_dst; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); + SUBTEST << size << 'x' << size << "; CV_32FC1"; - t0 = (double)cvGetTickCount();//cpu start - cv::exp(mat1_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, CV_32FC1, 0, 256); + gen(dst, size, size, CV_32FC1, 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 + exp(src, dst); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; + CPU_ON; + exp(src, dst); + CPU_OFF; + d_src.upload(src); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::exp(gmat1, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download(cpu_dst); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - //EXPECT_MAT_NEAR(dst, cpu_dst, 0,""); - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; + WARMUP_ON; + ocl::exp(d_src, d_dst); + WARMUP_OFF; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + GPU_ON; + ocl::exp(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::exp(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::exp(gmat1, gdst); - }; -#endif - } - -////////////////////////////////log///////////////////////////////////////////////// - -struct Log : ArithmTestBase {}; - -TEST_P(Log, Mat) +///////////// LOG //////////////////////// +TEST(Log) { + Mat src, dst; + ocl::oclMat d_src, d_dst; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); + SUBTEST << size << 'x' << size << "; 32F"; - t0 = (double)cvGetTickCount();//cpu start - cv::log(mat1_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, CV_32F, 1, 10); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + log(src, dst); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::log(gmat1, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + log(src, dst); + CPU_OFF; + d_src.upload(src); - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + WARMUP_ON; + ocl::log(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::log(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::log(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::log(gmat1, gdst); - }; -#endif - } - - - -////////////////////////////////add///////////////////////////////////////////////// - -struct Add : ArithmTestBase {}; - -TEST_P(Add, Mat) +///////////// Add //////////////////////// +TEST(Add) { + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::add(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 1); + gen(src2, size, size, all_type[j], 0, 1); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + add(src1, src2, dst); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::add(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; + CPU_ON; + add(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + WARMUP_ON; + ocl::add(d_src1, d_src2, d_dst); + WARMUP_OFF; + GPU_ON; + ocl::add(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::add(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::add(gmat1, gmat2, gdst); - }; -#endif } -TEST_P(Add, Mat_Mask) +///////////// Mul //////////////////////// +TEST(Mul) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::add(mat1_roi, mat2_roi, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::add(gmat1, gmat2, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + multiply(src1, src2, dst); + CPU_ON; + multiply(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::multiply(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::multiply(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::multiply(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::add(gmat1, gmat2, gdst, gmask); - }; -#endif -} -TEST_P(Add, Scalar) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::add(mat1_roi, val, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::add(gmat1, val, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::add(gmat1, val, gdst); - }; -#endif } -TEST_P(Add, Scalar_Mask) +///////////// Div //////////////////////// +TEST(Div) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::add(mat1_roi, val, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::add(gmat1, val, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + divide(src1, src2, dst); + + CPU_ON; + divide(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::divide(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::divide(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::divide(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::add(gmat1, val, gdst, gmask); - }; -#endif } - -////////////////////////////////sub///////////////////////////////////////////////// -struct Sub : ArithmTestBase {}; - -TEST_P(Sub, Mat) +///////////// Absdiff //////////////////////// +TEST(Absdiff) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::subtract(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::subtract(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + absdiff(src1, src2, dst); + CPU_ON; + absdiff(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::absdiff(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::absdiff(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::absdiff(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::subtract(gmat1, gmat2, gdst); - }; -#endif } -TEST_P(Sub, Mat_Mask) +///////////// CartToPolar //////////////////////// +TEST(CartToPolar) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst, dst1; + ocl::oclMat d_src1, d_src2, d_dst, d_dst1; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::subtract(mat1_roi, mat2_roi, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); + gen(dst1, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::subtract(gmat1, gmat2, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + cartToPolar(src1, src2, dst, dst1, 1); + CPU_ON; + cartToPolar(src1, src2, dst, dst1, 1); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); + WARMUP_OFF; + + GPU_ON; + ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); + d_dst.download(dst); + d_dst1.download(dst1); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::subtract(gmat1, gmat2, gdst, gmask); - }; -#endif -} -TEST_P(Sub, Scalar) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::subtract(mat1_roi, val, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::subtract(gmat1, val, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::subtract(gmat1, val, gdst); - }; -#endif } -TEST_P(Sub, Scalar_Mask) +///////////// PolarToCart //////////////////////// +TEST(PolarToCart) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst, dst1; + ocl::oclMat d_src1, d_src2, d_dst, d_dst1; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::subtract(mat1_roi, val, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); + gen(dst1, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::subtract(gmat1, val, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + polarToCart(src1, src2, dst, dst1, 1); + CPU_ON; + polarToCart(src1, src2, dst, dst1, 1); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); + WARMUP_OFF; + + GPU_ON; + ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); + d_dst.download(dst); + d_dst1.download(dst1); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::subtract(gmat1, val, gdst, gmask); - }; -#endif } - -////////////////////////////////Mul///////////////////////////////////////////////// -struct Mul : ArithmTestBase {}; - -TEST_P(Mul, Mat) +///////////// Magnitude //////////////////////// +TEST(magnitude) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat x, y, mag; + ocl::oclMat d_x, d_y, d_mag; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::multiply(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(x, size, size, all_type[j], 0, 1); + gen(y, size, size, all_type[j], 0, 1); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + magnitude(x, y, mag); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::multiply(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + magnitude(x, y, mag); + CPU_OFF; + d_x.upload(x); + d_y.upload(y); + WARMUP_ON; + ocl::magnitude(d_x, d_y, d_mag); + WARMUP_OFF; + + GPU_ON; + ocl::magnitude(d_x, d_y, d_mag); + ; + GPU_OFF; + + GPU_FULL_ON; + d_x.upload(x); + d_y.upload(y); + ocl::magnitude(d_x, d_y, d_mag); + d_mag.download(mag); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::multiply(gmat1, gmat2, gdst); - }; -#endif } -TEST_P(Mul, Mat_Scalar) +///////////// Transpose //////////////////////// +TEST(Transpose) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - cv::RNG &rng = TS::ptr()->get_rng(); - double s = rng.uniform(-10.0, 10.0); - t0 = (double)cvGetTickCount();//cpu start - cv::multiply(mat1_roi, mat2_roi, dst_roi, s); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + gen(src, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::multiply(gmat1, gmat2, gdst, s); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + transpose(src, dst); + CPU_ON; + transpose(src, dst); + CPU_OFF; + d_src.upload(src); + + WARMUP_ON; + ocl::transpose(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::transpose(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::transpose(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - cv::RNG &rng = TS::ptr()->get_rng(); - double s = rng.uniform(-10.0, 10.0); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::multiply(gmat1, gmat2, gdst, s); - }; -#endif } - -struct Div : ArithmTestBase {}; - -TEST_P(Div, Mat) +///////////// Flip //////////////////////// +TEST(Flip) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; FLIP_BOTH"; - t0 = (double)cvGetTickCount();//cpu start - cv::divide(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + flip(src, dst, 0); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::divide(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + flip(src, dst, 0); + CPU_OFF; + d_src.upload(src); + WARMUP_ON; + ocl::flip(d_src, d_dst, 0); + WARMUP_OFF; + + GPU_ON; + ocl::flip(d_src, d_dst, 0); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::flip(d_src, d_dst, 0); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::divide(gmat1, gmat2, gdst); - }; -#endif } -TEST_P(Div, Mat_Scalar) +///////////// minMax //////////////////////// +TEST(minMax) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src; + ocl::oclMat d_src; + + double min_val, max_val; + Point min_loc, max_loc; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - cv::RNG &rng = TS::ptr()->get_rng(); - double s = rng.uniform(-10.0, 10.0); - t0 = (double)cvGetTickCount();//cpu start - cv::divide(mat1_roi, mat2_roi, dst_roi, s); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + gen(src, size, size, all_type[j], 0, 256); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::divide(gmat1, gmat2, gdst, s); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); + CPU_OFF; + d_src.upload(src); + + WARMUP_ON; + ocl::minMax(d_src, &min_val, &max_val); + WARMUP_OFF; + + GPU_ON; + ocl::minMax(d_src, &min_val, &max_val); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::minMax(d_src, &min_val, &max_val); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - cv::RNG &rng = TS::ptr()->get_rng(); - double s = rng.uniform(-10.0, 10.0); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::divide(gmat1, gmat2, gdst, s); - }; -#endif } - -struct Absdiff : ArithmTestBase {}; - -TEST_P(Absdiff, Mat) +///////////// minMaxLoc //////////////////////// +TEST(minMaxLoc) { + Mat src; + ocl::oclMat d_src; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + double min_val, max_val; + Point min_loc, max_loc; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::absdiff(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 1); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + CPU_ON; + minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); + CPU_OFF; + d_src.upload(src); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::absdiff(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + WARMUP_ON; + ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); + WARMUP_OFF; + GPU_ON; + ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::absdiff(gmat1, gmat2, gdst); - }; -#endif } -TEST_P(Absdiff, Mat_Scalar) +///////////// Sum //////////////////////// +TEST(Sum) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src; + Scalar cpures, gpures; + ocl::oclMat d_src; + + int all_type[] = {CV_8UC1, CV_32SC1}; + std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::absdiff(mat1_roi, val, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + cpures = sum(src); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::absdiff(gmat1, val, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + cpures = sum(src); + CPU_OFF; + d_src.upload(src); + WARMUP_ON; + gpures = ocl::sum(d_src); + WARMUP_OFF; + + GPU_ON; + gpures = ocl::sum(d_src); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + gpures = ocl::sum(d_src); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::absdiff(gmat1, val, gdst); - }; -#endif } - - -struct CartToPolar : ArithmTestBase {}; - -TEST_P(CartToPolar, angleInDegree) +///////////// countNonZero //////////////////////// +TEST(countNonZero) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src; + ocl::oclMat d_src; + + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + countNonZero(src); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - cv::Mat cpu_dst1; - gdst1_whole.download(cpu_dst1); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + countNonZero(src); + CPU_OFF; + d_src.upload(src); + WARMUP_ON; + ocl::countNonZero(d_src); + WARMUP_OFF; + + GPU_ON; + ocl::countNonZero(d_src); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::countNonZero(d_src); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1); - }; -#endif } -TEST_P(CartToPolar, angleInRadians) +///////////// Phase //////////////////////// +TEST(Phase) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - cv::Mat cpu_dst1; - gdst1_whole.download(cpu_dst1); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + phase(src1, src2, dst, 1); + + CPU_ON; + phase(src1, src2, dst, 1); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::phase(d_src1, d_src2, d_dst, 1); + WARMUP_OFF; + + GPU_ON; + ocl::phase(d_src1, d_src2, d_dst, 1); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::phase(d_src1, d_src2, d_dst, 1); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0); - }; -#endif } - -struct PolarToCart : ArithmTestBase {}; - -TEST_P(PolarToCart, angleInDegree) +///////////// bitwise_and//////////////////////// +TEST(bitwise_and) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_8UC1, CV_32SC1}; + std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - cv::Mat cpu_dst1; - gdst1_whole.download(cpu_dst1); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + bitwise_and(src1, src2, dst); + CPU_ON; + bitwise_and(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::bitwise_and(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::bitwise_and(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::bitwise_and(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1); - }; -#endif } -TEST_P(PolarToCart, angleInRadians) +///////////// bitwise_or//////////////////////// +TEST(bitwise_or) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_8UC1, CV_32SC1}; + std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - cv::Mat cpu_dst1; - gdst1_whole.download(cpu_dst1); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + bitwise_or(src1, src2, dst); + CPU_ON; + bitwise_or(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::bitwise_or(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::bitwise_or(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::bitwise_or(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0); - }; -#endif } - - -struct Magnitude : ArithmTestBase {}; - -TEST_P(Magnitude, Mat) +///////////// bitwise_xor//////////////////////// +TEST(bitwise_xor) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_8UC1, CV_32SC1}; + std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::magnitude(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::magnitude(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + bitwise_xor(src1, src2, dst); + CPU_ON; + bitwise_xor(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::bitwise_xor(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::bitwise_xor(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::bitwise_xor(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::magnitude(gmat1, gmat2, gdst); - }; -#endif } -struct Transpose : ArithmTestBase {}; - -TEST_P(Transpose, Mat) +///////////// bitwise_not//////////////////////// +TEST(bitwise_not) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, dst; + ocl::oclMat d_src1, d_dst; + + int all_type[] = {CV_8UC1, CV_32SC1}; + std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::transpose(mat1_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::transpose(gmat1, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + bitwise_not(src1, dst); + CPU_ON; + bitwise_not(src1, dst); + CPU_OFF; + d_src1.upload(src1); + + WARMUP_ON; + ocl::bitwise_not(d_src1, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::bitwise_not(d_src1, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + ocl::bitwise_not(d_src1, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::transpose(gmat1, gdst); - }; -#endif } - -struct Flip : ArithmTestBase {}; - -TEST_P(Flip, X) +///////////// compare//////////////////////// +TEST(compare) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int CMP_EQ = 0; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::flip(mat1_roi, dst_roi, 0); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::flip(gmat1, gdst, 0); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + compare(src1, src2, dst, CMP_EQ); + CPU_ON; + compare(src1, src2, dst, CMP_EQ); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); + WARMUP_OFF; + + GPU_ON; + ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::flip(gmat1, gdst, 0); - }; -#endif } -TEST_P(Flip, Y) +///////////// pow //////////////////////// +TEST(pow) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::flip(mat1_roi, dst_roi, 1); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 100); + gen(dst, size, size, all_type[j], 0, 100); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + pow(src, -2.0, dst); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::flip(gmat1, gdst, 1); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + pow(src, -2.0, dst); + CPU_OFF; + d_src.upload(src); + d_dst.upload(dst); + WARMUP_ON; + ocl::pow(d_src, -2.0, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::pow(d_src, -2.0, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::pow(d_src, -2.0, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::flip(gmat1, gdst, 1); - }; -#endif } -TEST_P(Flip, BOTH) +///////////// MagnitudeSqr//////////////////////// +TEST(MagnitudeSqr) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[t]; - t0 = (double)cvGetTickCount();//cpu start - cv::flip(mat1_roi, dst_roi, -1); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[t], 0, 256); + gen(src2, size, size, all_type[t], 0, 256); + gen(dst, size, size, all_type[t], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::flip(gmat1, gdst, -1); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + for (int i = 0; i < src1.rows; ++i) - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::flip(gmat1, gdst, -1); - }; -#endif -} - - - -struct MinMax : ArithmTestBase {}; - -TEST_P(MinMax, MAT) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - double minVal, maxVal; - cv::Point minLoc, maxLoc; - t0 = (double)cvGetTickCount();//cpu start - if (mat1.depth() != CV_8S) - { - cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc); - } - else - { - minVal = std::numeric_limits<double>::max(); - maxVal = -std::numeric_limits<double>::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at<signed char>(i, j); - if (val < minVal) minVal = val; - if (val > maxVal) maxVal = val; - } - } - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - double minVal_, maxVal_; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::minMax(gmat1, &minVal_, &maxVal_); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - double minVal_, maxVal_; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::minMax(gmat1, &minVal_, &maxVal_); - }; -#endif -} - -TEST_P(MinMax, MASK) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - double minVal, maxVal; - cv::Point minLoc, maxLoc; - t0 = (double)cvGetTickCount();//cpu start - if (mat1.depth() != CV_8S) - { - cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc, mask_roi); - } - else - { - minVal = std::numeric_limits<double>::max(); - maxVal = -std::numeric_limits<double>::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at<signed char>(i, j); - unsigned char m = mask_roi.at<unsigned char>(i, j); - if (val < minVal && m) minVal = val; - if (val > maxVal && m) maxVal = val; - } - } - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::minMax(gmat1, &minVal_, &maxVal_, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::minMax(gmat1, &minVal_, &maxVal_, gmask); - }; -#endif -} - - -struct MinMaxLoc : ArithmTestBase {}; - -TEST_P(MinMaxLoc, MAT) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - double minVal, maxVal; - cv::Point minLoc, maxLoc; - int depth = mat1.depth(); - t0 = (double)cvGetTickCount();//cpu start - if (depth != CV_8S) - { - cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc); - } - else - { - minVal = std::numeric_limits<double>::max(); - maxVal = -std::numeric_limits<double>::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at<signed char>(i, j); - if (val < minVal) - { - minVal = val; - minLoc.x = j; - minLoc.y = i; - } - if (val > maxVal) - { - maxVal = val; - maxLoc.x = j; - maxLoc.y = i; - } - } - } - - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat()); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat()); - }; -#endif - -} - - -TEST_P(MinMaxLoc, MASK) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - double minVal, maxVal; - cv::Point minLoc, maxLoc; - int depth = mat1.depth(); - t0 = (double)cvGetTickCount();//cpu start - if (depth != CV_8S) - { - cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc, mask_roi); - } - else - { - minVal = std::numeric_limits<double>::max(); - maxVal = -std::numeric_limits<double>::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at<signed char>(i, j); - unsigned char m = mask_roi.at<unsigned char>(i , j); - if (val < minVal && m) - { - minVal = val; - minLoc.x = j; - minLoc.y = i; - } - if (val > maxVal && m) - { - maxVal = val; - maxLoc.x = j; - maxLoc.y = i; - } - } - } - - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask); - }; -#endif -} - - -struct Sum : ArithmTestBase {}; - -TEST_P(Sum, MAT) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::sum(mat1_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::sum(gmat1); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - Scalar gpures = cv::ocl::sum(gmat1); - }; -#endif -} - -//TEST_P(Sum, MASK) -//{ -// for(int j=0; j<LOOP_TIMES; j++) -// { -// -// } -//} - -struct CountNonZero : ArithmTestBase {}; - -TEST_P(CountNonZero, MAT) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::countNonZero(mat1_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::countNonZero(gmat1); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::countNonZero(gmat1); - }; -#endif - -} - - - -////////////////////////////////phase///////////////////////////////////////////////// -struct Phase : ArithmTestBase {}; - -TEST_P(Phase, Mat) -{ - if(mat1.depth() != CV_32F && mat1.depth() != CV_64F) - { - cout << "\tUnsupported type\t\n"; - } - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::phase(mat1_roi, mat2_roi, dst_roi, 0); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::phase(gmat1, gmat2, gdst, 0); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::phase(gmat1, gmat2, gdst, 0); - }; -#endif - -} - - -////////////////////////////////bitwise_and///////////////////////////////////////////////// -struct Bitwise_and : ArithmTestBase {}; - -TEST_P(Bitwise_and, Mat) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_and(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_and(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_and(gmat1, gmat2, gdst); - }; -#endif - -} - -TEST_P(Bitwise_and, Mat_Mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_and(mat1_roi, mat2_roi, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask); - }; -#endif -} - -TEST_P(Bitwise_and, Scalar) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_and(mat1_roi, val, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_and(gmat1, val, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_and(gmat1, val, gdst); - }; -#endif -} - -TEST_P(Bitwise_and, Scalar_Mask) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_and(mat1_roi, val, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_and(gmat1, val, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_and(gmat1, val, gdst, gmask); - }; -#endif -} - - - -////////////////////////////////bitwise_or///////////////////////////////////////////////// - -struct Bitwise_or : ArithmTestBase {}; - -TEST_P(Bitwise_or, Mat) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_or(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_or(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_or(gmat1, gmat2, gdst); - }; -#endif -} - -TEST_P(Bitwise_or, Mat_Mask) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_or(mat1_roi, mat2_roi, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask); - }; -#endif -} -TEST_P(Bitwise_or, Scalar) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_or(mat1_roi, val, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_or(gmat1, val, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_or(gmat1, val, gdst); - }; -#endif -} - -TEST_P(Bitwise_or, Scalar_Mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_or(mat1_roi, val, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_or(gmat1, val, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_or(gmat1, val, gdst, gmask); - }; -#endif -} - - -////////////////////////////////bitwise_xor///////////////////////////////////////////////// - -struct Bitwise_xor : ArithmTestBase {}; - -TEST_P(Bitwise_xor, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_xor(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_xor(gmat1, gmat2, gdst); - }; -#endif -} - -TEST_P(Bitwise_xor, Mat_Mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask); - }; -#endif -} - -TEST_P(Bitwise_xor, Scalar) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_xor(mat1_roi, val, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_xor(gmat1, val, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_xor(gmat1, val, gdst); - }; -#endif -} - -TEST_P(Bitwise_xor, Scalar_Mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_xor(mat1_roi, val, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_xor(gmat1, val, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_xor(gmat1, val, gdst, gmask); - }; -#endif -} - - -////////////////////////////////bitwise_not///////////////////////////////////////////////// - -struct Bitwise_not : ArithmTestBase {}; - -TEST_P(Bitwise_not, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::bitwise_not(mat1_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bitwise_not(gmat1, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bitwise_not(gmat1, gdst); - }; -#endif -} - -////////////////////////////////compare///////////////////////////////////////////////// -PARAM_TEST_CASE ( CompareTestBase, MatType, bool) -{ - int type; - cv::Scalar val; - - //src mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mask; - cv::Mat dst; - cv::Mat dst1; //bak, for two outputs - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int maskx; - int masky; - - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - cv::Mat dst1_roi; //bak - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - cv::ocl::oclMat gdst1_whole; //bak - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdst1; //bak - cv::ocl::oclMat gmask; - - virtual void SetUp() - { - //type = GET_PARAM(0); - type = CV_8UC1; - - cv::RNG &rng = TS::ptr()->get_rng(); - - cv::Size size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); - mat2 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dst1 = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src2x = 1; - src1y = 1; - src2y = 1; - dstx = 1; - dsty = 1; - maskx = 1; - masky = 1; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src2x = 0; - src1y = 0; - src2y = 0; - dstx = 0; - dsty = 0; - maskx = 0; - masky = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - //mat2_roi = mat2(Rect(src2x,src2y,256,1)); - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gdst1_whole = dst1; - //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat1 = mat1_roi; - //gmat2 = mat2_roi; - //gmask = mask_roi; - } - -}; -struct Compare : CompareTestBase {}; - -TEST_P(Compare, Mat) -{ - if(mat1.type() == CV_8SC1) - { - cout << "\tUnsupported type\t\n"; - } - - int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE}; - const char *cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"}; - int cmp_num = sizeof(cmp_codes) / sizeof(int); - for (int i = 0; i < cmp_num; ++i) - { - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::compare(mat1_roi, mat2_roi, dst_roi, cmp_codes[i]); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::compare(gmat1, gmat2, gdst, cmp_codes[i]); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - cout << cmp_str[i] << endl; - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::compare(gmat1, gmat2, gdst, cmp_codes[i]); - }; -#endif - } - -} - -struct Pow : ArithmTestBase {}; - -TEST_P(Pow, Mat) -{ - if(mat1.depth() != CV_32F && mat1.depth() != CV_64F) - { - cout << "\tUnsupported type\t\n"; - } - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - double p = 4.5; - t0 = (double)cvGetTickCount();//cpu start - cv::pow(mat1_roi, p, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::pow(gmat1, p, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - double p = 4.5; - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::pow(gmat1, p, gdst); - }; -#endif -} - - -struct MagnitudeSqr : ArithmTestBase {}; - -TEST_P(MagnitudeSqr, Mat) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - for(int i = 0; i < mat1.rows; ++i) - for(int j = 0; j < mat1.cols; ++j) + for (int j = 0; j < src1.cols; ++j) { - float val1 = mat1.at<float>(i, j); - float val2 = mat2.at<float>(i, j); + float val1 = src1.at<float>(i, j); + float val2 = src2.at<float>(i, j); ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; } - t0 = (double)cvGetTickCount() - t0;//cpu end - t1 = (double)cvGetTickCount();//gpu start1 - cv::ocl::oclMat clmat1(mat1), clmat2(mat2), cldst; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::magnitudeSqr(clmat1, clmat2, cldst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - cldst.download(cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + for (int i = 0; i < src1.rows; ++i) + for (int j = 0; j < src1.cols; ++j) + { + float val1 = src1.at<float>(i, j); + float val2 = src2.at<float>(i, j); + + ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; + + } + + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - cv::ocl::oclMat clmat1(mat1), clmat2(mat2), cldst; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::magnitudeSqr(clmat1, clmat2, cldst); - }; -#endif - } - -struct AddWeighted : ArithmTestBase {}; - -TEST_P(AddWeighted, Mat) +///////////// AddWeighted//////////////////////// +TEST(AddWeighted) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + double alpha = 2.0, beta = 1.0, gama = 3.0; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - double alpha = 2.0, beta = 1.0, gama = 3.0; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::addWeighted(mat1_roi, alpha, mat2_roi, beta, gama, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + addWeighted(src1, alpha, src2, beta, gama, dst); - gmat1 = mat1_roi; - gmat2 = mat2_roi; + CPU_ON; + addWeighted(src1, alpha, src2, beta, gama, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::addWeighted(gmat1, alpha, gmat2, beta, gama, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download(cpu_dst); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + WARMUP_ON; + ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); + WARMUP_OFF; + GPU_ON; + ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - double alpha = 2.0, beta = 1.0, gama = 3.0; - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::addWeighted(gmat1, alpha, gmat2, beta, gama, gdst); - // double alpha=2.0,beta=1.0,gama=3.0; - // cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; - // if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - // cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst); - }; -#endif - -} -/* -struct AddWeighted : ArithmTestBase {}; - -TEST_P(AddWeighted, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { - double alpha=2.0,beta=1.0,gama=3.0; - - t0 = (double)cvGetTickCount();//cpu start - cv::addWeighted(mat1,alpha,mat2,beta,gama,dst); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; - - t2=(double)cvGetTickCount();//kernel - cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - cldst.download(cpu_dst); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; - } - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - -#else - //for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - // { - double alpha=2.0,beta=1.0,gama=3.0; - cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; - //if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst); - // }; -#endif - -} - -*/ -//********test**************** - -INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine( - Values(CV_8UC1, CV_8UC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine( - Values(CV_32FC1, CV_32FC1), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine( - Values(CV_32FC1, CV_32FC1), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); - -INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - - -INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine( - Values(CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine( - Values(CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine( - Values(CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine( - Values(CV_8UC1, CV_32FC1), - Values(false))); - -INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine( - Values(CV_8UC1, CV_32FC1), - Values(false))); - -INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine( - Values(CV_8U, CV_32S, CV_32F), - Values(false))); - -INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine( - Values(CV_8U, CV_32S, CV_32F), - Values(false))); - - -INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - - -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine( - Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, MagnitudeSqr, Combine( - Values(CV_32FC1, CV_32FC1), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1), - Values(false))); // Values(false) is the reserved parameter - - - - -#endif // HAVE_OPENCL +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_blend.cpp b/modules/ocl/perf/perf_blend.cpp index f78f7d6b2..00034700b 100644 --- a/modules/ocl/perf/perf_blend.cpp +++ b/modules/ocl/perf/perf_blend.cpp @@ -44,79 +44,77 @@ //M*/ #include "precomp.hpp" -#include <iomanip> - -#ifdef HAVE_OPENCL -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; - -PARAM_TEST_CASE(Blend, MatType, int) +///////////// blend //////////////////////// +template <typename T> +void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold) { - int type; - int channels; - std::vector<cv::ocl::Info> oclinfo; + result_gold.create(img1.size(), img1.type()); - virtual void SetUp() + int cn = img1.channels(); + + for (int y = 0; y < img1.rows; ++y) { + const float *weights1_row = weights1.ptr<float>(y); + const float *weights2_row = weights2.ptr<float>(y); + const T *img1_row = img1.ptr<T>(y); + const T *img2_row = img2.ptr<T>(y); + T *result_gold_row = result_gold.ptr<T>(y); - type = GET_PARAM(0); - channels = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - //cv::ocl::setBinpath(CLBINPATH); - } -}; - -TEST_P(Blend, Performance) -{ - cv::Size size(MWIDTH, MHEIGHT); - cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0); - cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0); - cv::Mat weights1 = randomMat(size, CV_32F, 0, 1); - cv::Mat weights2 = randomMat(size, CV_32F, 0, 1); - cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F); - cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels)); - - - double totalgputick_all = 0; - double totalgputick_kernel = 0; - double t1 = 0; - double t2 = 0; - - for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100 - { - t1 = (double)cvGetTickCount(); - cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host); - cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host); - cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1); - cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1); - - t2 = (double)cvGetTickCount(); - cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst); - t2 = (double)cvGetTickCount() - t2; - - cv::Mat m; - gdst.download(m); - t1 = (double)cvGetTickCount() - t1; - - if (j == 0) + for (int x = 0; x < img1.cols * cn; ++x) { - continue; + float w1 = weights1_row[x / cn]; + float w2 = weights2_row[x / cn]; + result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f)); } - - totalgputick_all = t1 + totalgputick_all; - totalgputick_kernel = t2 + totalgputick_kernel; - }; - - cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - + } } +TEST(blend) +{ + Mat src1, src2, weights1, weights2, dst; + ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst; -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine( - Values(CV_8U, CV_32F), Values(1, 4))); -#endif \ No newline at end of file + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] << " and CV_32FC1"; + + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(weights1, size, size, CV_32FC1, 0, 1); + gen(weights2, size, size, CV_32FC1, 0, 1); + + blendLinearGold<uchar>(src1, src2, weights1, weights2, dst); + + CPU_ON; + blendLinearGold<uchar>(src1, src2, weights1, weights2, dst); + CPU_OFF; + + d_src1.upload(src1); + d_src2.upload(src2); + d_weights1.upload(weights1); + d_weights2.upload(weights2); + + WARMUP_ON; + ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + d_weights1.upload(weights1); + d_weights2.upload(weights2); + ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; + } + } +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_brute_force_matcher.cpp b/modules/ocl/perf/perf_brute_force_matcher.cpp new file mode 100644 index 000000000..6562f91e4 --- /dev/null +++ b/modules/ocl/perf/perf_brute_force_matcher.cpp @@ -0,0 +1,150 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" + +//////////////////// BruteForceMatch ///////////////// +TEST(BruteForceMatcher) +{ + Mat trainIdx_cpu; + Mat distance_cpu; + Mat allDist_cpu; + Mat nMatches_cpu; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + // Init CPU matcher + int desc_len = 64; + + BFMatcher matcher(NORM_L2); + + Mat query; + gen(query, size, desc_len, CV_32F, 0, 1); + + Mat train; + gen(train, size, desc_len, CV_32F, 0, 1); + // Output + vector< vector<DMatch> > matches(2); + // Init GPU matcher + ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist); + + ocl::oclMat d_query(query); + ocl::oclMat d_train(train); + + ocl::oclMat d_trainIdx, d_distance, d_allDist, d_nMatches; + + SUBTEST << size << "; match"; + + matcher.match(query, train, matches[0]); + + CPU_ON; + matcher.match(query, train, matches[0]); + CPU_OFF; + + WARMUP_ON; + d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); + WARMUP_OFF; + + GPU_ON; + d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); + ; + GPU_OFF; + + GPU_FULL_ON; + d_query.upload(query); + d_train.upload(train); + d_matcher.match(d_query, d_train, matches[0]); + GPU_FULL_OFF; + + SUBTEST << size << "; knnMatch"; + + matcher.knnMatch(query, train, matches, 2); + + CPU_ON; + matcher.knnMatch(query, train, matches, 2); + CPU_OFF; + + WARMUP_ON; + d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); + WARMUP_OFF; + + GPU_ON; + d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); + ; + GPU_OFF; + + GPU_FULL_ON; + d_query.upload(query); + d_train.upload(train); + d_matcher.knnMatch(d_query, d_train, matches, 2); + GPU_FULL_OFF; + + SUBTEST << size << "; radiusMatch"; + + float max_distance = 2.0f; + + matcher.radiusMatch(query, train, matches, max_distance); + + CPU_ON; + matcher.radiusMatch(query, train, matches, max_distance); + CPU_OFF; + + d_trainIdx.release(); + + WARMUP_ON; + d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); + WARMUP_OFF; + + GPU_ON; + d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); + ; + GPU_OFF; + + GPU_FULL_ON; + d_query.upload(query); + d_train.upload(train); + d_matcher.radiusMatch(d_query, d_train, matches, max_distance); + GPU_FULL_OFF; + } +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_canny.cpp b/modules/ocl/perf/perf_canny.cpp index eb895df5e..428e036d0 100644 --- a/modules/ocl/perf/perf_canny.cpp +++ b/modules/ocl/perf/perf_canny.cpp @@ -42,112 +42,42 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#include <iomanip> -#ifdef HAVE_OPENCL -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; -#ifndef MWC_TEST_UTILITY -#define MWC_TEST_UTILITY - -// Param class -#ifndef IMPLEMENT_PARAM_CLASS -#define IMPLEMENT_PARAM_CLASS(name, type) \ -class name \ - { \ - public: \ - name ( type arg = type ()) : val_(arg) {} \ - operator type () const {return val_;} \ - private: \ - type val_; \ - }; \ - inline void PrintTo( name param, std::ostream* os) \ - { \ - *os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \ - } - -IMPLEMENT_PARAM_CLASS(Channels, int) -#endif // IMPLEMENT_PARAM_CLASS -#endif // MWC_TEST_UTILITY - -//////////////////////////////////////////////////////// -// Canny1 -extern std::string workdir; -IMPLEMENT_PARAM_CLASS(AppertureSize, int); -IMPLEMENT_PARAM_CLASS(L2gradient, bool); - -PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient) +///////////// Canny //////////////////////// +TEST(Canny) { - int apperture_size; - bool useL2gradient; - //std::vector<cv::ocl::Info> oclinfo; + Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); - virtual void SetUp() + if (img.empty()) { - apperture_size = GET_PARAM(0); - useL2gradient = GET_PARAM(1); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } -}; - -TEST_P(Canny1, Performance) -{ - cv::Mat img = readImage(workdir + "fruits.jpg", cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(img.empty()); - - double low_thresh = 100.0; - double high_thresh = 150.0; - - cv::Mat edges_gold; - cv::ocl::oclMat edges; - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - edges.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - + throw runtime_error("can't open aloeL.jpg"); } - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1"; + Mat edges(img.size(), CV_8UC1); -} + CPU_ON; + Canny(img, edges, 50.0, 100.0); + CPU_OFF; -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine( - testing::Values(AppertureSize(3), AppertureSize(5)), - testing::Values(L2gradient(false), L2gradient(true)))); + ocl::oclMat d_img(img); + ocl::oclMat d_edges; + ocl::CannyBuf d_buf; + WARMUP_ON; + ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); + WARMUP_OFF; + GPU_ON; + ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); + ; + GPU_OFF; -#endif //Have opencl \ No newline at end of file + GPU_FULL_ON; + d_img.upload(img); + ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); + d_edges.download(edges); + GPU_FULL_OFF; +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_color.cpp b/modules/ocl/perf/perf_color.cpp new file mode 100644 index 000000000..e32a1839d --- /dev/null +++ b/modules/ocl/perf/perf_color.cpp @@ -0,0 +1,91 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" + +///////////// cvtColor//////////////////////// +TEST(cvtColor) +{ + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_8UC4}; + std::string type_name[] = {"CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + gen(src, size, size, all_type[j], 0, 256); + SUBTEST << size << "x" << size << "; " << type_name[j] << " ; CV_RGBA2GRAY"; + + cvtColor(src, dst, CV_RGBA2GRAY, 4); + + CPU_ON; + cvtColor(src, dst, CV_RGBA2GRAY, 4); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); + WARMUP_OFF; + + GPU_ON; + ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); + d_dst.download(dst); + GPU_FULL_OFF; + } + + + } + + +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_columnsum.cpp b/modules/ocl/perf/perf_columnsum.cpp index 96ea26a50..d2e3b45e5 100644 --- a/modules/ocl/perf/perf_columnsum.cpp +++ b/modules/ocl/perf/perf_columnsum.cpp @@ -15,8 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Fangfang Bai fangfang@multicorewareinc.com -// +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -31,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -43,78 +42,47 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#include <iomanip> -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; - -/////////////////////////////////////////////////////////////////////////////// -/// ColumnSum - -#ifdef HAVE_OPENCL - -//////////////////////////////////////////////////////////////////////// -// ColumnSum - -PARAM_TEST_CASE(ColumnSum) +///////////// columnSum//////////////////////// +TEST(columnSum) { - cv::Mat src; - //std::vector<cv::ocl::Info> oclinfo; + Mat src, dst; + ocl::oclMat d_src, d_dst; - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); + SUBTEST << size << 'x' << size << "; CV_32FC1"; + + gen(src, size, size, CV_32FC1, 0, 256); + + CPU_ON; + dst.create(src.size(), src.type()); + + for (int i = 1; i < src.rows; ++i) + { + for (int j = 0; j < src.cols; ++j) + { + dst.at<float>(i, j) = src.at<float>(i, j) += src.at<float>(i - 1, j); + } + } + + CPU_OFF; + + d_src.upload(src); + WARMUP_ON; + ocl::columnSum(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::columnSum(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::columnSum(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } -}; - -TEST_F(ColumnSum, Performance) -{ - cv::Size size(MWIDTH, MHEIGHT); - cv::Mat src = randomMat(size, CV_32FC1); - cv::ocl::oclMat d_dst; - - double totalgputick = 0; - double totalgputick_kernel = 0; - double t1 = 0; - double t2 = 0; - - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat d_src(src); - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::columnSum(d_src, d_dst); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - d_dst.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - - -} - - - -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_fft.cpp b/modules/ocl/perf/perf_fft.cpp index c9c19d0d4..50be2546e 100644 --- a/modules/ocl/perf/perf_fft.cpp +++ b/modules/ocl/perf/perf_fft.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Fangfangbai, fangfang@multicorewareinc.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -42,85 +42,48 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -using namespace std; -#ifdef HAVE_CLAMDFFT -//////////////////////////////////////////////////////////////////////////// -// Dft -PARAM_TEST_CASE(Dft, cv::Size, bool) + +///////////// dft //////////////////////// +TEST(dft) { - cv::Size dft_size; - bool dft_rows; - vector<cv::ocl::Info> info; - virtual void SetUp() + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_32FC1, CV_32FC2}; + std::string type_name[] = {"CV_32FC1", "CV_32FC2"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - dft_size = GET_PARAM(0); - dft_rows = GET_PARAM(1); - cv::ocl::getDevice(info); - } -}; + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; complex-to-complex"; -TEST_P(Dft, C2C) -{ - cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0); - int flags = 0; - flags |= dft_rows ? cv::DFT_ROWS : 0; + gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(1)); - cv::ocl::oclMat d_b; + dft(src, dst); - double totalgputick = 0; - double totalgputick_kernel = 0; - double t1 = 0; - double t2 = 0; + CPU_ON; + dft(src, dst); + CPU_OFF; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { + d_src.upload(src); - t1 = (double)cvGetTickCount();//gpu start1 + WARMUP_ON; + ocl::dft(d_src, d_dst, Size(size, size)); + WARMUP_OFF; - cv::ocl::oclMat ga = cv::ocl::oclMat(a); //upload + GPU_ON; + ocl::dft(d_src, d_dst, Size(size, size)); + ; + GPU_OFF; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::dft(ga, d_b, a.size(), flags); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - d_b.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; + GPU_FULL_ON; + d_src.upload(src); + ocl::dft(d_src, d_dst, Size(size, size)); + d_dst.download(dst); + GPU_FULL_OFF; + } } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; -} - - - -TEST_P(Dft, R2CthenC2R) -{ - cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); - - int flags = 0; - //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet - - cv::ocl::oclMat d_b, d_c; - - cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); - cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT); - - EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, ""); -} - -//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine( -// testing::Values(cv::Size(1280, 1024), cv::Size(1920, 1080),cv::Size(1800, 1500)), -// testing::Values(false, true))); - -#endif // HAVE_CLAMDFFT +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index 100a1c59d..e9646c77e 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -10,15 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Niko Li, newlife20080214@gmail.com -// Jia Haipeng, jiahaipeng95@gmail.com -// Zero Lin, Zero.Lin@amd.com -// Zhang Ying, zhangying913@gmail.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -33,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -45,1165 +42,331 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#ifdef HAVE_OPENCL - -using namespace cvtest; -using namespace testing; -using namespace std; -//using namespace cv::ocl; - -PARAM_TEST_CASE(FilterTestBase, MatType, bool) +///////////// Blur//////////////////////// +TEST(Blur) { - int type; - cv::Scalar val; + Mat src1, dst; + ocl::oclMat d_src1, d_dst; - //src mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mask; - cv::Mat dst; - cv::Mat dst1; //bak, for two outputs + Size ksize = Size(3, 3); + int bordertype = BORDER_CONSTANT; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int maskx; - int masky; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - cv::Mat dst1_roi; //bak - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - cv::ocl::oclMat gdst1_whole; //bak - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdst1; //bak - cv::ocl::oclMat gmask; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + gen(src1, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - mat1 = randomMat(rng, size, type, 5, 16, false); - mat2 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dst1 = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + blur(src1, dst, ksize, Point(-1, -1), bordertype); + + CPU_ON; + blur(src1, dst, ksize, Point(-1, -1), bordertype); + CPU_OFF; + + d_src1.upload(src1); + + WARMUP_ON; + ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); + WARMUP_OFF; + + GPU_ON; + ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); + d_dst.download(dst); + GPU_FULL_OFF; + } - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); } - - void random_roi() - { - cv::RNG &rng = TS::ptr()->get_rng(); - - //randomize ROI - roicols = rng.uniform(1, mat1.cols); - roirows = rng.uniform(1, mat1.rows); - src1x = rng.uniform(0, mat1.cols - roicols); - src1y = rng.uniform(0, mat1.rows - roirows); - src2x = rng.uniform(0, mat2.cols - roicols); - src2y = rng.uniform(0, mat2.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); - maskx = rng.uniform(0, mask.cols - roicols); - masky = rng.uniform(0, mask.rows - roirows); - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); - - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - } - -}; - -///////////////////////////////////////////////////////////////////////////////////////////////// -// blur - -PARAM_TEST_CASE(Blur, MatType, cv::Size, int) +} +///////////// Laplacian//////////////////////// +TEST(Laplacian) { - int type; - cv::Size ksize; - int bordertype; + Mat src1, dst; + ocl::oclMat d_src1, d_dst; - //src mat - cv::Mat mat1; - cv::Mat dst; + int ksize = 3; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - ksize = GET_PARAM(1); - bordertype = GET_PARAM(2); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - - void Has_roi(int b) - { - if(b) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src1, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); + + + Laplacian(src1, dst, -1, ksize, 1); + + CPU_ON; + Laplacian(src1, dst, -1, ksize, 1); + CPU_OFF; + + d_src1.upload(src1); + + WARMUP_ON; + ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); + WARMUP_OFF; + + GPU_ON; + ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); + d_dst.download(dst); + GPU_FULL_OFF; } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); } - -}; - -TEST_P(Blur, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::blur(mat1_roi, dst_roi, ksize, Point(-1, -1), bordertype); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::blur(gmat1, gdst, ksize, Point(-1, -1), bordertype); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::blur(gmat1, gdst, ksize, Point(-1, -1), bordertype); - }; -#endif - } -///////////////////////////////////////////////////////////////////////////////////////////////// -//Laplacian - -PARAM_TEST_CASE(LaplacianTestBase, MatType, int) +///////////// Erode //////////////////// +TEST(Erode) { - int type; - int ksize; + Mat src, dst, ker; + ocl::oclMat d_src, d_dst; - //src mat - cv::Mat mat; - cv::Mat dst; + int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat_roi; - cv::Mat dst_roi; - std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - ksize = GET_PARAM(1); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - roicols = mat.cols - 1; - roirows = mat.rows - 1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(256)); + ker = getStructuringElement(MORPH_RECT, Size(3, 3)); + + erode(src, dst, ker); + + CPU_ON; + erode(src, dst, ker); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::erode(d_src, d_dst, ker); + WARMUP_OFF; + + GPU_ON; + ocl::erode(d_src, d_dst, ker); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::erode(d_src, d_dst, ker); + d_dst.download(dst); + GPU_FULL_OFF; } - else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - }; - - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); } - -}; - -struct Laplacian : LaplacianTestBase {}; - -TEST_P(Laplacian, Accuracy) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::Laplacian(mat_roi, dst_roi, -1, ksize, 1); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat = mat_roi; - - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1); - }; -#endif } - -///////////////////////////////////////////////////////////////////////////////////////////////// -// erode & dilate - -PARAM_TEST_CASE(ErodeDilateBase, MatType, bool) +///////////// Sobel //////////////////////// +TEST(Sobel) { - int type; - //int iterations; + Mat src, dst; + ocl::oclMat d_src, d_dst; - //erode or dilate kernel - cv::Mat kernel; + int dx = 1; + int dy = 1; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - // iterations = GET_PARAM(1); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - // rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3)); - kernel = randomMat(rng, Size(3, 3), CV_8UC1, 0, 3, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + Sobel(src, dst, -1, dx, dy); + + CPU_ON; + Sobel(src, dst, -1, dx, dy); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::Sobel(d_src, d_dst, -1, dx, dy); + WARMUP_OFF; + + GPU_ON; + ocl::Sobel(d_src, d_dst, -1, dx, dy); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::Sobel(d_src, d_dst, -1, dx, dy); + d_dst.download(dst); + GPU_FULL_OFF; } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); } - -}; - -// erode - -struct Erode : ErodeDilateBase {}; - -TEST_P(Erode, Mat) +} +///////////// Scharr //////////////////////// +TEST(Scharr) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int dx = 1; + int dy = 0; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::erode(mat1_roi, dst_roi, kernel); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + Scharr(src, dst, -1, dx, dy); - gmat1 = mat1_roi; + CPU_ON; + Scharr(src, dst, -1, dx, dy); + CPU_OFF; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::erode(gmat1, gdst, kernel); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + d_src.upload(src); - if(j == 0) - continue; + WARMUP_ON; + ocl::Scharr(d_src, d_dst, -1, dx, dy); + WARMUP_OFF; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + GPU_ON; + ocl::Scharr(d_src, d_dst, -1, dx, dy); + ; + GPU_OFF; + GPU_FULL_ON; + d_src.upload(src); + ocl::Scharr(d_src, d_dst, -1, dx, dy); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::erode(gmat1, gdst, kernel); - }; -#endif - } -// dilate - -struct Dilate : ErodeDilateBase {}; - -TEST_P(Dilate, Mat) +///////////// GaussianBlur //////////////////////// +TEST(GaussianBlur) { + Mat src, dst; + int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - t0 = (double)cvGetTickCount();//cpu start - cv::dilate(mat1_roi, dst_roi, kernel); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + gen(src, size, size, all_type[j], 0, 256); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::dilate(gmat1, gdst, kernel); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + GaussianBlur(src, dst, Size(9, 9), 0); - if(j == 0) - continue; + CPU_ON; + GaussianBlur(src, dst, Size(9, 9), 0); + CPU_OFF; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + ocl::oclMat d_src(src); + ocl::oclMat d_dst(src.size(), src.type()); + ocl::oclMat d_buf; + WARMUP_ON; + ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + WARMUP_OFF; + + GPU_ON; + ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::dilate(gmat1, gdst, kernel); - }; -#endif - } -///////////////////////////////////////////////////////////////////////////////////////////////// -// Sobel - -PARAM_TEST_CASE(Sobel, MatType, int, int, int, int) +///////////// filter2D//////////////////////// +TEST(filter2D) { - int type; - int dx, dy, ksize, bordertype; + Mat src; - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - dx = GET_PARAM(1); - dy = GET_PARAM(2); - ksize = GET_PARAM(3); - bordertype = GET_PARAM(4); - dx = 2; - dy = 0; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; + gen(src, size, size, all_type[j], 0, 256); + + for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1) + { + SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ; + + Mat kernel; + gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); + + Mat dst; + cv::filter2D(src, dst, -1, kernel); + + CPU_ON; + cv::filter2D(src, dst, -1, kernel); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_dst; + + WARMUP_ON; + ocl::filter2D(d_src, d_dst, -1, kernel); + WARMUP_OFF; + + GPU_ON; + ocl::filter2D(d_src, d_dst, -1, kernel); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::filter2D(d_src, d_dst, -1, kernel); + d_dst.download(dst); + GPU_FULL_OFF; + } + } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); } - -}; - -TEST_P(Sobel, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::Sobel(mat1_roi, dst_roi, -1, dx, dy, ksize, /*scale*/0.00001,/*delta*/0, bordertype); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::Sobel(gmat1, gdst, -1, dx, dy, ksize,/*scale*/0.00001,/*delta*/0, bordertype); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::Sobel(gmat1, gdst, -1, dx, dy, ksize,/*scale*/0.00001,/*delta*/0, bordertype); - }; -#endif - -} - -///////////////////////////////////////////////////////////////////////////////////////////////// -// Scharr - -PARAM_TEST_CASE(Scharr, MatType, int, int, int) -{ - int type; - int dx, dy, bordertype; - - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - dx = GET_PARAM(1); - dy = GET_PARAM(2); - bordertype = GET_PARAM(3); - dx = 1; - dy = 0; - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - - } -}; - -TEST_P(Scharr, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::Scharr(mat1_roi, dst_roi, -1, dx, dy, /*scale*/1,/*delta*/0, bordertype); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::Scharr(gmat1, gdst, -1, dx, dy,/*scale*/1,/*delta*/0, bordertype); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::Scharr(gmat1, gdst, -1, dx, dy,/*scale*/1,/*delta*/0, bordertype); - }; -#endif - -} - -///////////////////////////////////////////////////////////////////////////////////////////////// -// GaussianBlur - -PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int) -{ - int type; - cv::Size ksize; - int bordertype; - - double sigma1, sigma2; - - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - ksize = GET_PARAM(1); - bordertype = GET_PARAM(2); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - sigma1 = rng.uniform(0.1, 1.0); - sigma2 = rng.uniform(0.1, 1.0); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - - } - -}; - -TEST_P(GaussianBlur, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::GaussianBlur(mat1_roi, dst_roi, ksize, sigma1, sigma2, bordertype); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype); - }; -#endif - -} - -//************test********** - -INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(cv::Size(3, 3)/*, cv::Size(5, 5), cv::Size(7, 7)*/), - Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101))); - - -INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(1/*, 3*/))); - -//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3))); - -INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC1), Values(false))); - -//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3))); - -INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false))); - - -INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_32FC1), - Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT, - (MatType)cv::BORDER_REPLICATE))); - - -INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine( - Values(CV_8UC1, CV_32FC1), Values(0, 1), Values(0, 1), - Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE))); - -INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine( - Values(CV_8UC1, CV_32FC1), - Values(cv::Size(3, 3), cv::Size(5, 5)), - Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE))); - - -#endif // HAVE_OPENCL +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_gemm.cpp b/modules/ocl/perf/perf_gemm.cpp index c3dcab34f..930ecb046 100644 --- a/modules/ocl/perf/perf_gemm.cpp +++ b/modules/ocl/perf/perf_gemm.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -41,73 +42,47 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - - #include "precomp.hpp" -using namespace std; -#ifdef HAVE_CLAMDBLAS -//////////////////////////////////////////////////////////////////////////// -// GEMM -PARAM_TEST_CASE(Gemm, int, cv::Size, int) + +///////////// gemm //////////////////////// +TEST(gemm) { - int type; - cv::Size mat_size; - int flags; - vector<cv::ocl::Info> info; - virtual void SetUp() + Mat src1, src2, src3, dst; + ocl::oclMat d_src1, d_src2, d_src3, d_dst; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - mat_size = GET_PARAM(1); - flags = GET_PARAM(2); + SUBTEST << size << 'x' << size; - cv::ocl::getDevice(info); + gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); + gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); + gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); + + gemm(src1, src2, 1.0, src3, 1.0, dst); + + CPU_ON; + gemm(src1, src2, 1.0, src3, 1.0, dst); + CPU_OFF; + + d_src1.upload(src1); + d_src2.upload(src2); + d_src3.upload(src3); + + WARMUP_ON; + ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + d_src3.upload(src3); + ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } -}; - -TEST_P(Gemm, Performance) -{ - cv::Mat a = randomMat(mat_size, type, 0.0, 10.0); - cv::Mat b = randomMat(mat_size, type, 0.0, 10.0); - cv::Mat c = randomMat(mat_size, type, 0.0, 10.0); - cv::ocl::oclMat ocl_dst; - - double totalgputick = 0; - double totalgputick_kernel = 0; - double t1 = 0; - double t2 = 0; - - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload - cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload - cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::gemm(ga, gb, 1.0, gc, 1.0, ocl_dst, flags); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - ocl_dst.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; -} - - -INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine( - testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/), - testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)), - testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T)))); -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp index 525b8fb49..5a909ace4 100644 --- a/modules/ocl/perf/perf_haar.cpp +++ b/modules/ocl/perf/perf_haar.cpp @@ -10,12 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Jia Haipeng, jiahaipeng95@gmail.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -42,133 +42,97 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - -#include "opencv2/objdetect/objdetect.hpp" #include "precomp.hpp" -#ifdef HAVE_OPENCL +///////////// Haar //////////////////////// +namespace cv +{ +namespace ocl +{ -using namespace cvtest; -using namespace testing; -using namespace std; -using namespace cv; -extern std::string workdir; struct getRect { - Rect operator ()(const CvAvgComp &e) const + Rect operator()(const CvAvgComp &e) const { return e.rect; } }; -PARAM_TEST_CASE(HaarTestBase, int, int) +class CascadeClassifier_GPU : public OclCascadeClassifier { - //std::vector<cv::ocl::Info> oclinfo; - cv::ocl::OclCascadeClassifier cascade, nestedCascade; - cv::CascadeClassifier cpucascade, cpunestedCascade; - // Mat img; - - double scale; - int index; - - virtual void SetUp() +public: + void detectMultiScale(oclMat &image, + CV_OUT std::vector<cv::Rect>& faces, + double scaleFactor = 1.1, + int minNeighbors = 3, int flags = 0, + Size minSize = Size(), + Size maxSize = Size()) { - scale = 1.0; - index = 0; - string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml"; - - if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName))) - { - cout << "ERROR: Could not load classifier cascade" << endl; - return; - } - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath("E:\\"); + (void)maxSize; + MemStorage storage(cvCreateMemStorage(0)); + //CvMat img=image; + CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize); + vector<CvAvgComp> vecAvgComp; + Seq<CvAvgComp>(objs).copyTo(vecAvgComp); + faces.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect()); } + }; -////////////////////////////////faceDetect///////////////////////////////////////////////// - -struct Haar : HaarTestBase {}; - -TEST_F(Haar, FaceDetect) -{ - string imgName = workdir + "lena.jpg"; - Mat img = imread( imgName, 1 ); - - if(img.empty()) - { - std::cout << imgName << std::endl; - return ; - } - - //int i = 0; - double t = 0; - vector<Rect> faces, oclfaces; - - // const static Scalar colors[] = { CV_RGB(0, 0, 255), - // CV_RGB(0, 128, 255), - // CV_RGB(0, 255, 255), - // CV_RGB(0, 255, 0), - // CV_RGB(255, 128, 0), - // CV_RGB(255, 255, 0), - // CV_RGB(255, 0, 0), - // CV_RGB(255, 0, 255) - // } ; - - Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); - MemStorage storage(cvCreateMemStorage(0)); - cvtColor( img, gray, CV_BGR2GRAY ); - resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - equalizeHist( smallImg, smallImg ); - - t = (double)cvGetTickCount(); - for(int k = 0; k < LOOP_TIMES; k++) - { - cpucascade.detectMultiScale( smallImg, faces, 1.1, - 3, 0 - | CV_HAAR_SCALE_IMAGE - , Size(30, 30), Size(0, 0) ); - } - t = (double)cvGetTickCount() - t ; - printf( "cpudetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) ); - - cv::ocl::oclMat image; - CvSeq *_objects=NULL; - t = (double)cvGetTickCount(); - for(int k = 0; k < LOOP_TIMES; k++) - { - image.upload(smallImg); - _objects = cascade.oclHaarDetectObjects( image, storage, 1.1, - 3, 0 - | CV_HAAR_SCALE_IMAGE - , Size(30, 30), Size(0, 0) ); - } - t = (double)cvGetTickCount() - t ; - printf( "ocldetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) ); - vector<CvAvgComp> vecAvgComp; - Seq<CvAvgComp>(_objects).copyTo(vecAvgComp); - oclfaces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); - - //for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) - //{ - // Mat smallImgROI; - // Point center; - // Scalar color = colors[i%8]; - // int radius; - // center.x = cvRound((r->x + r->width*0.5)*scale); - // center.y = cvRound((r->y + r->height*0.5)*scale); - // radius = cvRound((r->width + r->height)*0.25*scale); - // circle( img, center, radius, color, 3, 8, 0 ); - //} - //namedWindow("result"); - //imshow("result",img); - //waitKey(0); - //destroyAllWindows(); - } -#endif // HAVE_OPENCL +} +TEST(Haar) +{ + Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE); + + if (img.empty()) + { + throw runtime_error("can't open basketball1.png"); + } + + CascadeClassifier faceCascadeCPU; + + if (!faceCascadeCPU.load(abspath("haarcascade_frontalface_alt.xml"))) + { + throw runtime_error("can't load haarcascade_frontalface_alt.xml"); + } + + vector<Rect> faces; + + SUBTEST << img.cols << "x" << img.rows << "; scale image"; + CPU_ON; + faceCascadeCPU.detectMultiScale(img, faces, + 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); + CPU_OFF; + + ocl::CascadeClassifier_GPU faceCascade; + + if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml"))) + { + throw runtime_error("can't load haarcascade_frontalface_alt.xml"); + } + + ocl::oclMat d_img(img); + + faces.clear(); + + WARMUP_ON; + faceCascade.detectMultiScale(d_img, faces, + 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); + WARMUP_OFF; + + faces.clear(); + + GPU_ON; + faceCascade.detectMultiScale(d_img, faces, + 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); + ; + GPU_OFF; + + GPU_FULL_ON; + d_img.upload(img); + faceCascade.detectMultiScale(d_img, faces, + 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); + GPU_FULL_OFF; +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp index fd58808a8..b74077ff4 100644 --- a/modules/ocl/perf/perf_hog.cpp +++ b/modules/ocl/perf/perf_hog.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Peng Xiao, pengxiao@multicorewareinc.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -42,125 +42,47 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#include <iomanip> -#ifdef HAVE_OPENCL - -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; -extern std::string workdir; - -#ifndef MWC_TEST_UTILITY -#define MWC_TEST_UTILITY - -// Param class -#ifndef IMPLEMENT_PARAM_CLASS -#define IMPLEMENT_PARAM_CLASS(name, type) \ -class name \ - { \ - public: \ - name ( type arg = type ()) : val_(arg) {} \ - operator type () const {return val_;} \ - private: \ - type val_; \ - }; \ - inline void PrintTo( name param, std::ostream* os) \ - { \ - *os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \ - } - -#endif // IMPLEMENT_PARAM_CLASS -#endif // MWC_TEST_UTILITY - -IMPLEMENT_PARAM_CLASS(WinSizw48, bool); - -PARAM_TEST_CASE(HOG, WinSizw48, bool) +///////////// HOG//////////////////////// +TEST(HOG) { - bool is48; - vector<float> detector; - virtual void SetUp() + Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE); + + if (src.empty()) { - is48 = GET_PARAM(0); - if(is48) - { - detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96(); - } - else - { - detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128(); - } - } -}; - -TEST_P(HOG, Performance) -{ - cv::Mat img = readImage(workdir + "lena.jpg", cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(img.empty()); - - // define HOG related arguments - float scale = 1.05f; - //int nlevels = 13; - int gr_threshold = 8; - float hit_threshold = 1.4f; - //bool hit_threshold_auto = true; - - int win_width = is48 ? 48 : 64; - int win_stride_width = 8; - int win_stride_height = 8; - - bool gamma_corr = true; - - Size win_size(win_width, win_width * 2); //(64, 128) or (48, 96) - Size win_stride(win_stride_width, win_stride_height); - - cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, - cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, - cv::ocl::HOGDescriptor::DEFAULT_NLEVELS); - - gpu_hog.setSVMDetector(detector); - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - t1 = (double)cvGetTickCount();//gpu start1 - - ocl::oclMat d_src(img);//upload - - t2 = (double)cvGetTickCount(); //kernel - - vector<Rect> found; - gpu_hog.detectMultiScale(d_src, found, hit_threshold, win_stride, - Size(0, 0), scale, gr_threshold); - - t2 = (double)cvGetTickCount() - t2;//kernel - - // no download time for HOG - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - + throw runtime_error("can't open road.png"); } - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; -} + cv::HOGDescriptor hog; + hog.setSVMDetector(hog.getDefaultPeopleDetector()); + std::vector<cv::Rect> found_locations; -INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, testing::Combine(testing::Values(WinSizw48(false), WinSizw48(true)), testing::Values(false))); + SUBTEST << 768 << 'x' << 576 << "; road.png"; -#endif //Have opencl \ No newline at end of file + hog.detectMultiScale(src, found_locations); + + CPU_ON; + hog.detectMultiScale(src, found_locations); + CPU_OFF; + + cv::ocl::HOGDescriptor ocl_hog; + ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector()); + ocl::oclMat d_src; + d_src.upload(src); + + WARMUP_ON; + ocl_hog.detectMultiScale(d_src, found_locations); + WARMUP_OFF; + + GPU_ON; + ocl_hog.detectMultiScale(d_src, found_locations); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl_hog.detectMultiScale(d_src, found_locations); + GPU_FULL_OFF; +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index bc54cb275..756f69556 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -10,18 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Niko Li, newlife20080214@gmail.com -// Jia Haipeng, jiahaipeng95@gmail.com -// Shengen Yan, yanshengen@gmail.com -// Jiang Liyuan, lyuan001.good@163.com -// Rock Li, Rock.Li@amd.com -// Zailong Wu, bullet@yeah.net -// Xu Pang, pangxu010@163.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -36,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -48,949 +42,290 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#ifdef HAVE_OPENCL - -using namespace cvtest; -using namespace testing; -using namespace std; - - -MatType nulltype = -1; - -#define ONE_TYPE(type) testing::ValuesIn(typeVector(type)) -#define NULL_TYPE testing::ValuesIn(typeVector(nulltype)) - - -vector<MatType> typeVector(MatType type) +///////////// equalizeHist //////////////////////// +TEST(equalizeHist) { - vector<MatType> v; - v.push_back(type); - return v; -} + Mat src, dst; + int all_type[] = {CV_8UC1}; + std::string type_name[] = {"CV_8UC1"}; - -PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bool) -{ - int type1, type2, type3, type4, type5; - cv::Scalar val; - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int dst1x; - int dst1y; - int maskx; - int masky; - - //mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mask; - cv::Mat dst; - cv::Mat dst1; //bak, for two outputs - - //mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - cv::Mat dst1_roi; //bak - //std::vector<cv::ocl::Info> oclinfo; - //ocl mat - cv::ocl::oclMat clmat1; - cv::ocl::oclMat clmat2; - cv::ocl::oclMat clmask; - cv::ocl::oclMat cldst; - cv::ocl::oclMat cldst1; //bak - - //ocl mat with roi - cv::ocl::oclMat clmat1_roi; - cv::ocl::oclMat clmat2_roi; - cv::ocl::oclMat clmask_roi; - cv::ocl::oclMat cldst_roi; - cv::ocl::oclMat cldst1_roi; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type1 = GET_PARAM(0); - type2 = GET_PARAM(1); - type3 = GET_PARAM(2); - type4 = GET_PARAM(3); - type5 = GET_PARAM(4); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - double min = 1, max = 20; - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - if(type1 != nulltype) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - mat1 = randomMat(rng, size, type1, min, max, false); - clmat1 = mat1; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + equalizeHist(src, dst); + + CPU_ON; + equalizeHist(src, dst); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_dst; + ocl::oclMat d_hist; + ocl::oclMat d_buf; + + WARMUP_ON; + ocl::equalizeHist(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::equalizeHist(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::equalizeHist(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(type2 != nulltype) - { - mat2 = randomMat(rng, size, type2, min, max, false); - clmat2 = mat2; - } - if(type3 != nulltype) - { - dst = randomMat(rng, size, type3, min, max, false); - cldst = dst; - } - if(type4 != nulltype) - { - dst1 = randomMat(rng, size, type4, min, max, false); - cldst1 = dst1; - } - if(type5 != nulltype) - { - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - cv::threshold(mask, mask, 0.5, 255., type5); - clmask = mask; - } - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - } - - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols - 1; //start - roirows = mat1.rows - 1; - src1x = 1; - src2x = 1; - src1y = 1; - src2y = 1; - dstx = 1; - dsty = 1; - dst1x = 1; - dst1y = 1; - maskx = 1; - masky = 1; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src2x = 0; - src1y = 0; - src2y = 0; - dstx = 0; - dsty = 0; - dst1x = 0; - dst1y = 0; - maskx = 0; - masky = 0; - }; - - if(type1 != nulltype) - { - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows)); - } - if(type2 != nulltype) - { - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows)); - } - if(type3 != nulltype) - { - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows)); - } - if(type4 != nulltype) - { - dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); - //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows)); - } - if(type5 != nulltype) - { - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows)); - } - } - - void random_roi() - { - cv::RNG &rng = TS::ptr()->get_rng(); - - //randomize ROI - roicols = rng.uniform(1, mat1.cols); - roirows = rng.uniform(1, mat1.rows); - src1x = rng.uniform(0, mat1.cols - roicols); - src1y = rng.uniform(0, mat1.rows - roirows); - src2x = rng.uniform(0, mat2.cols - roicols); - src2y = rng.uniform(0, mat2.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); - dst1x = rng.uniform(0, dst1.cols - roicols); - dst1y = rng.uniform(0, dst1.rows - roirows); - maskx = rng.uniform(0, mask.cols - roicols); - masky = rng.uniform(0, mask.rows - roirows); - - if(type1 != nulltype) - { - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows)); - } - if(type2 != nulltype) - { - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows)); - } - if(type3 != nulltype) - { - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows)); - } - if(type4 != nulltype) - { - dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); - //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows)); - } - if(type5 != nulltype) - { - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows)); - } - } -}; -////////////////////////////////equalizeHist////////////////////////////////////////// - -struct equalizeHist : ImgprocTestBase {}; - -TEST_P(equalizeHist, MatType) -{ - if (mat1.type() != CV_8UC1 || mat1.type() != dst.type()) - { - cout << "Unsupported type" << endl; - EXPECT_DOUBLE_EQ(0.0, 0.0); - } - else - { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::equalizeHist(mat1_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - cldst_roi = cldst(Rect(dstx, dsty, roicols, roirows)); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::equalizeHist(clmat1_roi, cldst_roi); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - //cldst.download(cpu_cldst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::equalizeHist(clmat1_roi, cldst_roi); - }; -#endif - } -} - - -////////////////////////////////bilateralFilter//////////////////////////////////////////// - -struct bilateralFilter : ImgprocTestBase {}; - -TEST_P(bilateralFilter, Mat) -{ - double sigmacolor = 50.0; - int radius = 9; - int d = 2 * radius + 1; - double sigmaspace = 20.0; - int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,cv::BORDER_REFLECT,cv::BORDER_WRAP,cv::BORDER_REFLECT_101*/}; - const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/}; - - if (mat1.depth() != CV_8U || mat1.type() != dst.type()) - { - cout << "Unsupported type" << endl; - EXPECT_DOUBLE_EQ(0.0, 0.0); - } - else - { - for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++) - { - cout << borderstr[i] << endl; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE) && (mat1_roi.cols <= radius)) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius)) - { - continue; - } - t0 = (double)cvGetTickCount();//cpu start - cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i]); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i]); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - cldst.download(cpu_cldst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } - -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - }; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i]); - }; - -#endif - }; } } - -////////////////////////////////copyMakeBorder//////////////////////////////////////////// - -struct CopyMakeBorder : ImgprocTestBase {}; - -TEST_P(CopyMakeBorder, Mat) +/////////// CopyMakeBorder ////////////////////// +TEST(CopyMakeBorder) { - int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101}; - //const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/}; - int top = 5; - int bottom = 5; - int left = 6; - int right = 6; - if (mat1.type() != dst.type()) + Mat src, dst; + ocl::oclMat d_dst; + + int bordertype = BORDER_CONSTANT; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - cout << "Unsupported type" << endl; - EXPECT_DOUBLE_EQ(0.0, 0.0); - } - else - { - for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < 1; k++) //don't support roi perf test - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0)); - t0 = (double)cvGetTickCount() - t0;//cpu end - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0)); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - cldst.download(cpu_cldst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + gen(src, size, size, all_type[j], 0, 256); - if(j == 0) - continue; + copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); + CPU_OFF; + + ocl::oclMat d_src(src); + + WARMUP_ON; + ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); + WARMUP_OFF; + + GPU_ON; + ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); + d_dst.download(dst); + GPU_FULL_OFF; + } - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - }; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0)); - }; -#endif - }; } } - -////////////////////////////////cornerMinEigenVal////////////////////////////////////////// - -struct cornerMinEigenVal : ImgprocTestBase {}; - -TEST_P(cornerMinEigenVal, Mat) +///////////// cornerMinEigenVal //////////////////////// +TEST(cornerMinEigenVal) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_dst; + + int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4); + int borderType = BORDER_REFLECT; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - int blockSize = 7, apertureSize = 3; //1 + 2 * (rand() % 4); - int borderType = cv::BORDER_REFLECT; - t0 = (double)cvGetTickCount();//cpu start - cv::cornerMinEigenVal(mat1_roi, dst_roi, blockSize, apertureSize, borderType); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - cldst.download(cpu_cldst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; + gen(src, size, size, all_type[j], 0, 256); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); + CPU_ON; + cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); + CPU_OFF; + + ocl::oclMat d_src(src); + + WARMUP_ON; + ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); + WARMUP_OFF; + + GPU_ON; + ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4); - int borderType = cv::BORDER_REFLECT; - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - }; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType); - }; -#endif } - - -////////////////////////////////cornerHarris////////////////////////////////////////// - -struct cornerHarris : ImgprocTestBase {}; - -TEST_P(cornerHarris, Mat) +///////////// cornerHarris //////////////////////// +TEST(cornerHarris) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - int blockSize = 7, apertureSize = 3; - int borderType = cv::BORDER_REFLECT; - double kk = 2; - t0 = (double)cvGetTickCount();//cpu start - cv::cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, kk, borderType); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; BORDER_REFLECT"; - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - cldst.download(cpu_cldst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + gen(src, size, size, all_type[j], 0, 1); - if(j == 0) - continue; + cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT); + CPU_OFF; + d_src.upload(src); + + WARMUP_ON; + ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); + WARMUP_OFF; + + GPU_ON; + ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - double kk = 2; - int blockSize = 7, apertureSize = 3; - int borderType = cv::BORDER_REFLECT; - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - }; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType); - }; -#endif - } - - -////////////////////////////////integral///////////////////////////////////////////////// - -struct integral : ImgprocTestBase {}; - -TEST_P(integral, Mat) +///////////// integral //////////////////////// +TEST(integral) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, sum; + ocl::oclMat d_src, d_sum, d_buf; + + int all_type[] = {CV_8UC1}; + std::string type_name[] = {"CV_8UC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - t0 = (double)cvGetTickCount();//cpu start - cv::integral(mat1_roi, dst_roi, dst1_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - cv::Mat cpu_cldst1; - cldst.download(cpu_cldst);//download - cldst1.download(cpu_cldst1); - t1 = (double)cvGetTickCount() - t1;//gpu end1 + gen(src, size, size, all_type[j], 0, 256); - if(j == 0) - continue; + integral(src, sum); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + integral(src, sum); + CPU_OFF; + d_src.upload(src); + + WARMUP_ON; + ocl::integral(d_src, d_sum); + WARMUP_OFF; + + GPU_ON; + ocl::integral(d_src, d_sum); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::integral(d_src, d_sum); + d_sum.download(sum); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - }; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi); - }; -#endif } - - -///////////////////////////////////////////////////////////////////////////////////////////////// -// warpAffine & warpPerspective - -PARAM_TEST_CASE(WarpTestBase, MatType, int) +///////////// WarpAffine //////////////////////// +TEST(WarpAffine) { - int type; - cv::Size size; - int interpolation; + Mat src, dst; + ocl::oclMat d_src, d_dst; - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int src_roicols; - int src_roirows; - int dst_roicols; - int dst_roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - //dsize = GET_PARAM(1); - interpolation = GET_PARAM(1); - - cv::RNG &rng = TS::ptr()->get_rng(); - size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - src_roicols = mat1.cols - 1; //start - src_roirows = mat1.rows - 1; - dst_roicols = dst.cols - 1; - dst_roirows = dst.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; - - } - else - { - src_roicols = mat1.cols; - src_roirows = mat1.rows; - dst_roicols = dst.cols; - dst_roirows = dst.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - - }; - mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows)); - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - - } - -}; - -/////warpAffine - -struct WarpAffine : WarpTestBase {}; - -TEST_P(WarpAffine, Mat) -{ static const double coeffs[2][3] = { {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, {sin(3.14 / 6), cos(3.14 / 6), -100.0} }; Mat M(2, 3, CV_64F, (void *)coeffs); + int interpolation = INTER_NEAREST; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::warpAffine(mat1_roi, dst_roi, M, size, interpolation); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); + Size size1 = Size(size, size); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); + warpAffine(src, dst, M, size1, interpolation); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + CPU_ON; + warpAffine(src, dst, M, size1, interpolation); + CPU_OFF; - if(j == 0) - continue; + d_src.upload(src); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + WARMUP_ON; + ocl::warpAffine(d_src, d_dst, M, size1, interpolation); + WARMUP_OFF; + GPU_ON; + ocl::warpAffine(d_src, d_dst, M, size1, interpolation); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::warpAffine(d_src, d_dst, M, size1, interpolation); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation); - }; -#endif - } - - -// warpPerspective - -struct WarpPerspective : WarpTestBase {}; - -TEST_P(WarpPerspective, Mat) +///////////// WarpPerspective //////////////////////// +TEST(WarpPerspective) { + Mat src, dst; + ocl::oclMat d_src, d_dst; + static const double coeffs[3][3] = { {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, @@ -998,1154 +333,576 @@ TEST_P(WarpPerspective, Mat) {0.0, 0.0, 1.0} }; Mat M(3, 3, CV_64F, (void *)coeffs); + int interpolation = INTER_NEAREST; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::warpPerspective(mat1_roi, dst_roi, M, size, interpolation); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); + Size size1 = Size(size, size); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); + warpPerspective(src, dst, M, size1, interpolation); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + CPU_ON; + warpPerspective(src, dst, M, size1, interpolation); + CPU_OFF; - if(j == 0) - continue; + d_src.upload(src); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + WARMUP_ON; + ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); + WARMUP_OFF; + GPU_ON; + ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation); - }; -#endif - } -///////////////////////////////////////////////////////////////////////////////////////////////// -// remap -////////////////////////////////////////////////////////////////////////////////////////////////// - -PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) +///////////// resize //////////////////////// +TEST(resize) { - int srcType; - int map1Type; - int map2Type; - cv::Scalar val; - - int interpolation; - int bordertype; - - cv::Mat src; - cv::Mat dst; - cv::Mat map1; - cv::Mat map2; + Mat src, dst; + ocl::oclMat d_src, d_dst; - int src_roicols; - int src_roirows; - int dst_roicols; - int dst_roirows; - int map1_roicols; - int map1_roirows; - int map2_roicols; - int map2_roirows; - int srcx; - int srcy; - int dstx; - int dsty; - int map1x; - int map1y; - int map2x; - int map2y; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - cv::Mat src_roi; - cv::Mat dst_roi; - cv::Mat map1_roi; - cv::Mat map2_roi; - - //ocl mat for testing - cv::ocl::oclMat gdst; - - //ocl mat with roi - cv::ocl::oclMat gsrc_roi; - cv::ocl::oclMat gdst_roi; - cv::ocl::oclMat gmap1_roi; - cv::ocl::oclMat gmap2_roi; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - srcType = GET_PARAM(0); - map1Type = GET_PARAM(1); - map2Type = GET_PARAM(2); - interpolation = GET_PARAM(3); - bordertype = GET_PARAM(4); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT); - cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT); - double min = 5, max = 16; - - if(srcType != nulltype) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - src = randomMat(rng, srcSize, srcType, min, max, false); - } - if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) - { - map1 = randomMat(rng, map1Size, map1Type, min, max, false); + SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; up"; - } - else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) - { - map1 = randomMat(rng, map1Size, map1Type, min, max, false); - map2 = randomMat(rng, map1Size, map1Type, min, max, false); + gen(src, size, size, all_type[j], 0, 256); + + resize(src, dst, Size(), 2.0, 2.0); + + CPU_ON; + resize(src, dst, Size(), 2.0, 2.0); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); + WARMUP_OFF; + + GPU_ON; + ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); + d_dst.download(dst); + GPU_FULL_OFF; } + } + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; down"; + + gen(src, size, size, all_type[j], 0, 256); + + resize(src, dst, Size(), 0.5, 0.5); + + CPU_ON; + resize(src, dst, Size(), 0.5, 0.5); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); + WARMUP_OFF; + + GPU_ON; + ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); + d_dst.download(dst); + GPU_FULL_OFF; + } + + } +} +///////////// threshold//////////////////////// +TEST(threshold) +{ + Mat src, dst; + ocl::oclMat d_src, d_dst; + + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY"; + + gen(src, size, size, CV_8U, 0, 100); + + threshold(src, dst, 50.0, 0.0, THRESH_BINARY); + + CPU_ON; + threshold(src, dst, 50.0, 0.0, THRESH_BINARY); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); + WARMUP_OFF; + + GPU_ON; + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); + d_dst.download(dst); + GPU_FULL_OFF; + + } + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; 32FC1; THRESH_TRUNC [NPP]"; + + gen(src, size, size, CV_32FC1, 0, 100); + + threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); + + CPU_ON; + threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); + WARMUP_OFF; + + GPU_ON; + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); + d_dst.download(dst); + GPU_FULL_OFF; + } +} +///////////// meanShiftFiltering//////////////////////// +TEST(meanShiftFiltering) +{ + int sp = 10, sr = 10; + Mat src, dst; + + ocl::oclMat d_src, d_dst; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4"; + + gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256)); + + pyrMeanShiftFiltering(src, dst, sp, sr); + + CPU_ON; + pyrMeanShiftFiltering(src, dst, sp, sr); + CPU_OFF; + + gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); + + d_src.upload(src); + + WARMUP_ON; + ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + WARMUP_OFF; + + GPU_ON; + ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + d_dst.download(dst); + GPU_FULL_OFF; + } +} +///////////// meanShiftProc//////////////////////// +COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab) +{ + + int isr2 = sr * sr; + int c0, c1, c2, c3; + int iter; + uchar *ptr = NULL; + uchar *pstart = NULL; + int revx = 0, revy = 0; + c0 = sptr[0]; + c1 = sptr[1]; + c2 = sptr[2]; + c3 = sptr[3]; + + // iterate meanshift procedure + for (iter = 0; iter < maxIter; iter++) + { + int count = 0; + int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; + + //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) + int minx = x0 - sp; + int miny = y0 - sp; + int maxx = x0 + sp; + int maxy = y0 + sp; + + //deal with the image boundary + if (minx < 0) + { + minx = 0; + } + + if (miny < 0) + { + miny = 0; + } + + if (maxx >= size.width) + { + maxx = size.width - 1; + } + + if (maxy >= size.height) + { + maxy = size.height - 1; + } + + if (iter == 0) + { + pstart = sptr; + } else - cout << "The wrong input type" << endl; - - dst = randomMat(rng, map1Size, srcType, min, max, false); - switch (src.channels()) { - case 1: - val = cv::Scalar(rng.uniform(0.0, 10.0), 0, 0, 0); - break; - case 2: - val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0); - break; - case 3: - val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0); - break; - case 4: - val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0)); + pstart = pstart + revy * sstep + (revx << 2); //point to the new position + } + + ptr = pstart; + ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row + + for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) + { + int rowCount = 0; + int x = minx; +#if CV_ENABLE_UNROLLED + + for (; x + 4 <= maxx; x += 4, ptr += 16) + { + int t0, t1, t2; + t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; + + if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x; + rowCount++; + } + + t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; + + if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 1; + rowCount++; + } + + t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; + + if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 2; + rowCount++; + } + + t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; + + if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 3; + rowCount++; + } + } + +#endif + + for (; x <= maxx; x++, ptr += 4) + { + int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; + + if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x; + rowCount++; + } + } + + if (rowCount == 0) + { + continue; + } + + count += rowCount; + sy += y * rowCount; + } + + if (count == 0) + { break; } - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - //if you want to use undefault device, set it here - //setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - if(b) + int x1 = sx / count; + int y1 = sy / count; + s0 = s0 / count; + s1 = s1 / count; + s2 = s2 / count; + + bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + + tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); + + //revise the pointer corresponding to the new (y0,x0) + revx = x1 - x0; + revy = y1 - y0; + + x0 = x1; + y0 = y1; + c0 = s0; + c1 = s1; + c2 = s2; + + if (stopFlag) { - //randomize ROI - dst_roicols = dst.cols - 1; - dst_roirows = dst.rows - 1; - - src_roicols = src.cols - 1; - src_roirows = src.rows - 1; - - - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; + break; } - else - { - dst_roicols = dst.cols; - dst_roirows = dst.rows; + } //for iter - src_roicols = src.cols; - src_roirows = src.rows; - - - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - } - map1_roicols = dst_roicols; - map1_roirows = dst_roirows; - map2_roicols = dst_roicols; - map2_roirows = dst_roirows; - map1x = dstx; - map1y = dsty; - map2x = dstx; - map2y = dsty; - - if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) - { - map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows)); - gmap1_roi = map1_roi; - } - - else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) - { - map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows)); - map2_roi = map2(Rect(map2x, map2y, map2_roicols, map2_roirows)); - gmap1_roi = map1_roi; - gmap2_roi = map2_roi; - } - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - src_roi = dst(Rect(srcx, srcy, src_roicols, src_roirows)); - - } -}; - -TEST_P(Remap, Mat) -{ - if((interpolation == 1 && map1Type == CV_16SC2) || (map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1)) - { - cout << "LINEAR don't support the map1Type and map2Type" << endl; - return; - } - int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/}; - const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/}; - cout << borderstr[0] << endl; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = 0; k < 2; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start - gsrc_roi = src_roi; - gdst = dst; - gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - t2 = (double)cvGetTickCount();//kernel - cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - gdst.download(cpu_dst); - - t1 = (double)cvGetTickCount() - t1;//gpu end - - if (j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = 0; j < 2; j ++) - { - Has_roi(j); - gdst = dst; - gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - gsrc_roi = src_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val); - }; -#endif + dptr[0] = (uchar)c0; + dptr[1] = (uchar)c1; + dptr[2] = (uchar)c2; + dptr[3] = (uchar)c3; + COOR coor; + coor.x = static_cast<short>(x0); + coor.y = static_cast<short>(y0); + return coor; } - -///////////////////////////////////////////////////////////////////////////////////////////////// -// resize - -PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int) +void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit) { - int type; - cv::Size dsize; - double fx, fy; - int interpolation; - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int src_roicols; - int src_roirows; - int dst_roicols; - int dst_roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() + if (src_roi.empty()) { - type = GET_PARAM(0); - dsize = GET_PARAM(1); - fx = GET_PARAM(2); - fy = GET_PARAM(3); - interpolation = GET_PARAM(4); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - if(dsize == cv::Size() && !(fx > 0 && fy > 0)) - { - cout << "invalid dsize and fx fy" << endl; - return; - } - - if(dsize == cv::Size()) - { - dsize.width = (int)(size.width * fx); - dsize.height = (int)(size.height * fy); - } - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, dsize, type, 5, 16, false); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - src_roicols = mat1.cols - 1; //start - src_roirows = mat1.rows - 1; - dst_roicols = dst.cols - 1; - dst_roirows = dst.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; - - } - else - { - src_roicols = mat1.cols; - src_roirows = mat1.rows; - dst_roicols = dst.cols; - dst_roirows = dst.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - - }; - mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows)); - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - + CV_Error(CV_StsBadArg, "The input image is empty"); } -}; - -TEST_P(Resize, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + if (src_roi.depth() != CV_8U || src_roi.channels() != 4) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported"); } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + + CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) && + (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows)); + CV_Assert(!(dstCoor_roi.step & 0x3)); + + if (!(crit.type & cv::TermCriteria::MAX_ITER)) { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); - gmat1 = mat1_roi; - if(j == 0) + crit.maxCount = 5; + } + + int maxIter = std::min(std::max(crit.maxCount, 1), 100); + float eps; + + if (!(crit.type & cv::TermCriteria::EPS)) + { + eps = 1.f; + } + + eps = (float)std::max(crit.epsilon, 0.0); + + int tab[512]; + + for (int i = 0; i < 512; i++) + { + tab[i] = (i - 255) * (i - 255); + } + + uchar *sptr = src_roi.data; + uchar *dptr = dst_roi.data; + short *dCoorptr = (short *)dstCoor_roi.data; + int sstep = (int)src_roi.step; + int dstep = (int)dst_roi.step; + int dCoorstep = (int)dstCoor_roi.step >> 1; + cv::Size size = src_roi.size(); + + for (int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), + dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1)) + { + for (int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2) { - cout << "no roi:"; + *((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); - }; -#endif + } } - -///////////////////////////////////////////////////////////////////////////////////////////////// -//threshold - -PARAM_TEST_CASE(Threshold, MatType, ThreshOp) +TEST(meanShiftProc) { - int type; - int threshOp; + Mat src, dst, dstCoor_roi; + ocl::oclMat d_src, d_dst, d_dstCoor_roi; - //src mat - cv::Mat mat1; - cv::Mat dst; + TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1); - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - threshOp = GET_PARAM(1); + SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 "; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); + gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); + gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256)); - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); + meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols - 1; //start - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; + CPU_ON; + meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); + CPU_OFF; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; + d_src.upload(src); - }; - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + WARMUP_ON; + ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); + WARMUP_OFF; + GPU_ON; + ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); + ; + GPU_OFF; - } -}; + GPU_FULL_ON; + d_src.upload(src); + ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); + d_dst.download(dst); + d_dstCoor_roi.download(dstCoor_roi); + GPU_FULL_OFF; -TEST_P(Threshold, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - double maxVal = randomDouble(20.0, 127.0); - double thresh = randomDouble(0.0, maxVal); - t0 = (double)cvGetTickCount();//cpu start - cv::threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - double maxVal = randomDouble(20.0, 127.0); - double thresh = randomDouble(0.0, maxVal); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp); - }; -#endif - -} -/////////////////////////////////////////////////////////////////////////////////////////////////// -//meanShift - -PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria) -{ - int type, typeCoor; - int sp, sr; - cv::TermCriteria crit; - //src mat - cv::Mat src; - cv::Mat dst; - cv::Mat dstCoor; - - //set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - - //src mat with roi - cv::Mat src_roi; - cv::Mat dst_roi; - cv::Mat dstCoor_roi; - - //ocl dst mat - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdstCoor; - - //std::vector<cv::ocl::Info> oclinfo; - //ocl mat with roi - cv::ocl::oclMat gsrc_roi; - cv::ocl::oclMat gdst_roi; - cv::ocl::oclMat gdstCoor_roi; - - virtual void SetUp() - { - type = GET_PARAM(0); - typeCoor = GET_PARAM(1); - sp = GET_PARAM(2); - sr = GET_PARAM(3); - crit = GET_PARAM(4); - - cv::RNG &rng = TS::ptr()->get_rng(); - - // MWIDTH=256, MHEIGHT=256. defined in utility.hpp - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - src = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dstCoor = randomMat(rng, size, typeCoor, 5, 16, false); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - //randomize ROI - roicols = src.cols - 1; - roirows = src.rows - 1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; - } - else - { - roicols = src.cols; - roirows = src.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - }; - - src_roi = src(Rect(srcx, srcy, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows)); - - gdst = dst; - gdstCoor = dstCoor; - } -}; - -/////////////////////////meanShiftFiltering///////////////////////////// -struct meanShiftFiltering : meanShiftTestBase {}; - -TEST_P(meanShiftFiltering, Mat) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double t1 = 0; - double t2 = 0; - for(int k = 0; k < 2; k++) - { - double totalgputick = 0; - double totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t1 = (double)cvGetTickCount();//gpu start1 - - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_gdst; - gdst.download(cpu_gdst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); - }; -#endif - -} - -///////////////////////////meanShiftProc////////////////////////////////// -struct meanShiftProc : meanShiftTestBase {}; - -TEST_P(meanShiftProc, Mat) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double t1 = 0; - double t2 = 0; - for(int k = 0; k < 2; k++) - { - double totalgputick = 0; - double totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t1 = (double)cvGetTickCount();//gpu start1 - - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_gdstCoor; - gdstCoor.download(cpu_gdstCoor);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); - }; -#endif - -} - -/////////////////////////////////////////////////////////////////////////////////////////// -//hist - -void calcHistGold(const cv::Mat &src, cv::Mat &hist) -{ - hist.create(1, 256, CV_32SC1); - hist.setTo(cv::Scalar::all(0)); - - int *hist_row = hist.ptr<int>(); - for (int y = 0; y < src.rows; ++y) - { - const uchar *src_row = src.ptr(y); - - for (int x = 0; x < src.cols; ++x) - ++hist_row[src_row[x]]; } } -PARAM_TEST_CASE(histTestBase, MatType, MatType) +///////////// remap//////////////////////// +TEST(remap) { - int type_src; + Mat src, dst, xmap, ymap; + ocl::oclMat d_src, d_dst, d_xmap, d_ymap; - //src mat - cv::Mat src; - cv::Mat dst_hist; - //set up roi - int roicols; - int roirows; - int srcx; - int srcy; - //src mat with roi - cv::Mat src_roi; - //ocl dst mat, dst_hist and gdst_hist don't have roi - cv::ocl::oclMat gdst_hist; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - //ocl mat with roi - cv::ocl::oclMat gsrc_roi; + int interpolation = INTER_LINEAR; + int borderMode = BORDER_CONSTANT; - // std::vector<cv::ocl::Info> oclinfo; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type_src = GET_PARAM(0); + for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++) + { + SUBTEST << size << 'x' << size << "; src " << type_name[t] << "; map CV_32FC1"; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); + gen(src, size, size, all_type[t], 0, 256); - src = randomMat(rng, size, type_src, 0, 256, false); + xmap.create(size, size, CV_32FC1); + dst.create(size, size, CV_32FC1); + ymap.create(size, size, CV_32FC1); + + for (int i = 0; i < size; ++i) + { + float *xmap_row = xmap.ptr<float>(i); + float *ymap_row = ymap.ptr<float>(i); + + for (int j = 0; j < size; ++j) + { + xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f; + ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f; + } + } + + + remap(src, dst, xmap, ymap, interpolation, borderMode); + + CPU_ON; + remap(src, dst, xmap, ymap, interpolation, borderMode); + CPU_OFF; + + d_src.upload(src); + d_dst.upload(dst); + d_xmap.upload(xmap); + d_ymap.upload(ymap); + + WARMUP_ON; + ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); + WARMUP_OFF; + + GPU_ON; + ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); + d_dst.download(dst); + GPU_FULL_OFF; + } - // int devnums = getDevice(oclinfo); - // CV_Assert(devnums > 0); - //if you want to use undefault device, set it here - //setDevice(oclinfo[0]); } - - void Has_roi(int b) - { - if(b) - { - //randomize ROI - roicols = src.cols - 1; - roirows = src.rows - 1; - srcx = 1; - srcy = 1; - } - else - { - roicols = src.cols; - roirows = src.rows; - srcx = 0; - srcy = 0; - }; - src_roi = src(Rect(srcx, srcy, roicols, roirows)); - } -}; - -///////////////////////////calcHist/////////////////////////////////////// -struct calcHist : histTestBase {}; - -TEST_P(calcHist, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = 0; k < 2; k++) - { - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - calcHistGold(src_roi, dst_hist); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - - gsrc_roi = src_roi; - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::calcHist(gsrc_roi, gdst_hist); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_hist; - gdst_hist.download(cpu_hist);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalcputick = t0 + totalcputick; - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = 0; j < 2; j ++) - { - Has_roi(j); - - gsrc_roi = src_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::calcHist(gsrc_roi, gdst_hist); - }; -#endif -} - - -//************test******************* - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( - ONE_TYPE(CV_8UC1), - NULL_TYPE, - ONE_TYPE(CV_8UC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine( - Values(CV_8UC1, CV_8UC3), - NULL_TYPE, - Values(CV_8UC1, CV_8UC3), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine( - Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/), - NULL_TYPE, - Values(CV_8UC1, CV_8UC4/*,CV_32SC1*/), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine( - Values(CV_8UC1, CV_32FC1), - NULL_TYPE, - ONE_TYPE(CV_32FC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine( - Values(CV_8UC1, CV_32FC1), - NULL_TYPE, - ONE_TYPE(CV_32FC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine( - ONE_TYPE(CV_8UC1), - NULL_TYPE, - ONE_TYPE(CV_32SC1), - ONE_TYPE(CV_32FC1), - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, - (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), - (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); - - -INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine - (Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, - (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), - (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); - - -INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()), - Values(0.5/*, 1.5, 2*/), Values(0.5/*, 1.5, 2*/), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR))); - - -INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine( - Values(CV_8UC1, CV_32FC1), Values(ThreshOp(cv::THRESH_BINARY), - ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), - ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV)))); - -INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine( - ONE_TYPE(CV_8UC4), - ONE_TYPE(CV_16SC2),//it is no use in meanShiftFiltering - Values(5), - Values(6), - Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1)) - )); - -INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine( - ONE_TYPE(CV_8UC4), - ONE_TYPE(CV_16SC2), - Values(5), - Values(6), - Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1)) - )); - -INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(CV_32FC1, CV_16SC2, CV_32FC2), Values(-1, CV_32FC1), - Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR), - Values((int)cv::BORDER_CONSTANT))); - -INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine( - ONE_TYPE(CV_8UC1), - ONE_TYPE(CV_32SC1) //no use - )); - -#endif // HAVE_OPENCL +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_match_template.cpp b/modules/ocl/perf/perf_match_template.cpp index cb5e86bab..2828efe01 100644 --- a/modules/ocl/perf/perf_match_template.cpp +++ b/modules/ocl/perf/perf_match_template.cpp @@ -42,191 +42,105 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#include <iomanip> -#ifdef HAVE_OPENCL -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; -#ifndef MWC_TEST_UTILITY -#define MWC_TEST_UTILITY -//////// Utility -#ifndef DIFFERENT_SIZES -#else -#undef DIFFERENT_SIZES -#endif -#define DIFFERENT_SIZES testing::Values(cv::Size(256, 256), cv::Size(3000, 3000)) - -// Param class -#ifndef IMPLEMENT_PARAM_CLASS -#define IMPLEMENT_PARAM_CLASS(name, type) \ -class name \ -{ \ -public: \ - name ( type arg = type ()) : val_(arg) {} \ - operator type () const {return val_;} \ -private: \ - type val_; \ -}; \ - inline void PrintTo( name param, std::ostream* os) \ -{ \ - *os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \ -} - -IMPLEMENT_PARAM_CLASS(Channels, int) -#endif // IMPLEMENT_PARAM_CLASS -#endif // MWC_TEST_UTILITY - -//////////////////////////////////////////////////////////////////////////////// -// MatchTemplate -#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED)) - -IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size); - -const char *TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"}; - -PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod) +/////////// matchTemplate //////////////////////// +//void InitMatchTemplate() +//{ +// Mat src; gen(src, 500, 500, CV_32F, 0, 1); +// Mat templ; gen(templ, 500, 500, CV_32F, 0, 1); +// ocl::oclMat d_src(src), d_templ(templ), d_dst; +// ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); +//} +TEST(matchTemplate) { - cv::Size size; - cv::Size templ_size; - int cn; - int method; - //vector<cv::ocl::Info> oclinfo; + //InitMatchTemplate(); - virtual void SetUp() + Mat src, templ, dst; + int templ_size = 5; + + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - size = GET_PARAM(0); - templ_size = GET_PARAM(1); - cn = GET_PARAM(2); - method = GET_PARAM(3); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); + int all_type[] = {CV_32FC1, CV_32FC4}; + std::string type_name[] = {"CV_32FC1", "CV_32FC4"}; + + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + for(templ_size = 5; templ_size <= 5; templ_size *= 5) + { + gen(src, size, size, all_type[j], 0, 1); + + SUBTEST << src.cols << 'x' << src.rows << "; " << type_name[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR"; + + gen(templ, templ_size, templ_size, all_type[j], 0, 1); + + matchTemplate(src, templ, dst, CV_TM_CCORR); + + CPU_ON; + matchTemplate(src, templ, dst, CV_TM_CCORR); + CPU_OFF; + + ocl::oclMat d_src(src), d_templ, d_dst; + + d_templ.upload(templ); + + WARMUP_ON; + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); + WARMUP_OFF; + + GPU_ON; + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_templ.upload(templ); + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); + d_dst.download(dst); + GPU_FULL_OFF; + } + } + + int all_type_8U[] = {CV_8UC1}; + std::string type_name_8U[] = {"CV_8UC1"}; + + for (size_t j = 0; j < sizeof(all_type_8U) / sizeof(int); j++) + { + for(templ_size = 5; templ_size <= 5; templ_size *= 5) + { + SUBTEST << src.cols << 'x' << src.rows << "; " << type_name_8U[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR_NORMED"; + + gen(src, size, size, all_type_8U[j], 0, 255); + + gen(templ, templ_size, templ_size, all_type_8U[j], 0, 255); + + matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); + + CPU_ON; + matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_templ(templ), d_dst; + + WARMUP_ON; + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); + WARMUP_OFF; + + GPU_ON; + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_templ.upload(templ); + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); + d_dst.download(dst); + GPU_FULL_OFF; + } + } } -}; -struct MatchTemplate8U : MatchTemplate {}; - -TEST_P(MatchTemplate8U, Performance) -{ - std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; - std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl; - std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl; - std::cout << "Channels: " << cn << std::endl; - - cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn)); - cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn)); - cv::Mat dst_gold; - cv::ocl::oclMat dst; - - - - - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload - cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - dst.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - -} - - -struct MatchTemplate32F : MatchTemplate {}; -TEST_P(MatchTemplate32F, Performance) -{ - std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; - std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl; - std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl; - std::cout << "Channels: " << cn << std::endl; - cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn)); - cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn)); - - cv::Mat dst_gold; - cv::ocl::oclMat dst; - - - - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - for(int j = 0; j < LOOP_TIMES; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload - cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - dst.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - - } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - - -} - - -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, - testing::Combine( - testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)), - testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), - testing::Values(Channels(1), Channels(4)/*, Channels(3)*/), - ALL_TEMPLATE_METHODS - ) - ); - -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine( - testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)), - testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), - testing::Values(Channels(1), Channels(4) /*, Channels(3)*/), - testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); - -#endif //HAVE_OPENCL \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp index ba011f8df..495b2b82c 100644 --- a/modules/ocl/perf/perf_matrix_operation.cpp +++ b/modules/ocl/perf/perf_matrix_operation.cpp @@ -10,12 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Jia Haipeng, jiahaipeng95@gmail.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -42,697 +42,140 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#ifdef HAVE_OPENCL - -using namespace cvtest; -using namespace testing; -using namespace std; -using namespace cv::ocl; -////////////////////////////////converto///////////////////////////////////////////////// -PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType) +///////////// ConvertTo//////////////////////// +TEST(ConvertTo) { - int type; - int dst_type; + Mat src, dst; + ocl::oclMat d_src, d_dst; - //src mat - cv::Mat mat; - cv::Mat dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat_roi; - cv::Mat dst_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - dst_type = GET_PARAM(1); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] << " to 32FC1"; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + gen(src, size, size, all_type[j], 0, 256); + //gen(dst, size, size, all_type[j], 0, 256); + + //d_dst.upload(dst); + + src.convertTo(dst, CV_32FC1); + + CPU_ON; + src.convertTo(dst, CV_32FC1); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + d_src.convertTo(d_dst, CV_32FC1); + WARMUP_OFF; + + GPU_ON; + d_src.convertTo(d_dst, CV_32FC1); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_src.convertTo(d_dst, CV_32FC1); + d_dst.download(dst); + GPU_FULL_OFF; + } - mat = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols - 1; //start - roirows = mat.rows - 1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; - } - else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - }; - - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat = mat_roi; - } -}; - - -struct ConvertTo : ConvertToTestBase {}; - -TEST_P(ConvertTo, Accuracy) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - mat_roi.convertTo(dst_roi, dst_type); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - t2 = (double)cvGetTickCount(); //kernel - gmat.convertTo(gdst, dst_type); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - gmat.convertTo(gdst, dst_type); - }; -#endif - } - - -///////////////////////////////////////////copyto///////////////////////////////////////////////////////////// - -PARAM_TEST_CASE(CopyToTestBase, MatType, bool) +///////////// copyTo//////////////////////// +TEST(copyTo) { - int type; + Mat src, dst; + ocl::oclMat d_src, d_dst; - cv::Mat mat; - cv::Mat mask; - cv::Mat dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - int maskx; - int masky; - - //src mat with roi - cv::Mat mat_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gmask; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + gen(src, size, size, all_type[j], 0, 256); + //gen(dst, size, size, all_type[j], 0, 256); - mat = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + //d_dst.upload(dst); + + src.copyTo(dst); + + CPU_ON; + src.copyTo(dst); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + d_src.copyTo(d_dst); + WARMUP_OFF; + + GPU_ON; + d_src.copyTo(d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_src.copyTo(d_dst); + d_dst.download(dst); + GPU_FULL_OFF; + } - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols - 1; //start - roirows = mat.rows - 1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; - maskx = 1; - masky = 1; - } - else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - maskx = 0; - masky = 0; - }; - - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat = mat_roi; - //gmask = mask_roi; - } -}; - -struct CopyTo : CopyToTestBase {}; - -TEST_P(CopyTo, Without_mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - mat_roi.copyTo(dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - t2 = (double)cvGetTickCount(); //kernel - gmat.copyTo(gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - gmat.copyTo(gdst); - }; -#endif } - -TEST_P(CopyTo, With_mask) +///////////// setTo//////////////////////// +TEST(setTo) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + Scalar val(1, 2, 3, 4); + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - mat_roi.copyTo(dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + src.setTo(val); - gmat = mat_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - gmat.copyTo(gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + src.setTo(val); + CPU_OFF; + d_src.upload(src); + + WARMUP_ON; + d_src.setTo(val); + WARMUP_OFF; + + GPU_ON; + d_src.setTo(val); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_src.setTo(val); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - gmat.copyTo(gdst, gmask); - }; -#endif -} - -///////////////////////////////////////////copyto///////////////////////////////////////////////////////////// - -PARAM_TEST_CASE(SetToTestBase, MatType, bool) -{ - int type; - cv::Scalar val; - - cv::Mat mat; - cv::Mat mask; - - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int maskx; - int masky; - - //src mat with roi - cv::Mat mat_roi; - cv::Mat mask_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gmat_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gmask; - - virtual void SetUp() - { - type = GET_PARAM(0); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols - 1; //start - roirows = mat.rows - 1; - srcx = 1; - srcy = 1; - maskx = 1; - masky = 1; - } - else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - maskx = 0; - masky = 0; - }; - - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - - //gmat_whole = mat; - //gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows)); - - //gmask = mask_roi; - } -}; - -struct SetTo : SetToTestBase {}; - -TEST_P(SetTo, Without_mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - mat_roi.setTo(val); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat_whole = mat; - gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows)); - t2 = (double)cvGetTickCount(); //kernel - gmat.setTo(val); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gmat_whole.download(cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat_whole = mat; - gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows)); - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - gmat.setTo(val); - }; -#endif -} - -TEST_P(SetTo, With_mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - mat_roi.setTo(val, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat_whole = mat; - gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows)); - - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - gmat.setTo(val, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gmat_whole.download(cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat_whole = mat; - gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows)); - - gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - gmat.setTo(val, gmask); - }; -#endif -} -PARAM_TEST_CASE(DataTransfer, MatType, bool) -{ - int type; - cv::Mat mat; - cv::ocl::oclMat gmat_whole; - - virtual void SetUp() - { - type = GET_PARAM(0); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - mat = randomMat(rng, size, type, 5, 16, false); - } -}; -TEST_P(DataTransfer, perf) -{ - double totaluploadtick = 0; - double totaldownloadtick = 0; - double totaltick = 0; - double t0 = 0; - double t1 = 0; - cv::Mat cpu_dst; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - t0 = (double)cvGetTickCount(); - gmat_whole.upload(mat);//upload - t0 = (double)cvGetTickCount() - t0; - - t1 = (double)cvGetTickCount(); - gmat_whole.download(cpu_dst);//download - t1 = (double)cvGetTickCount() - t1; - - if(j == 0) - continue; - totaluploadtick = t0 + totaluploadtick; - totaldownloadtick = t1 + totaldownloadtick; - } - totaltick = totaluploadtick + totaldownloadtick; - cout << "average upload time is " << totaluploadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average download time is " << totaldownloadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average data transfer time is " << totaltick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; -} -//**********test************ - -INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4))); - -INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter -INSTANTIATE_TEST_CASE_P(MatrixOperation, DataTransfer, Combine( - Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter -#endif +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_norm.cpp b/modules/ocl/perf/perf_norm.cpp new file mode 100644 index 000000000..8b7118a6e --- /dev/null +++ b/modules/ocl/perf/perf_norm.cpp @@ -0,0 +1,84 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" + +///////////// norm//////////////////////// +TEST(norm) +{ + Mat src, buf; + ocl::oclMat d_src, d_buf; + + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF"; + + gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); + gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); + + norm(src, NORM_INF); + + CPU_ON; + norm(src, NORM_INF); + CPU_OFF; + + d_src.upload(src); + d_buf.upload(buf); + + WARMUP_ON; + ocl::norm(d_src, d_buf, NORM_INF); + WARMUP_OFF; + + GPU_ON; + ocl::norm(d_src, d_buf, NORM_INF); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::norm(d_src, d_buf, NORM_INF); + GPU_FULL_OFF; + } +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_pyrdown.cpp b/modules/ocl/perf/perf_pyrdown.cpp index 2cea4df4a..1d1d2dec1 100644 --- a/modules/ocl/perf/perf_pyrdown.cpp +++ b/modules/ocl/perf/perf_pyrdown.cpp @@ -1,4 +1,4 @@ -/////////////////////////////////////////////////////////////////////////////////////// +/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// fangfang bai, fangfang@multicorewareinc.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -42,96 +42,46 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#include <iomanip> -#ifdef HAVE_OPENCL - -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; - -PARAM_TEST_CASE(PyrDown, MatType, int) +///////////// pyrDown ////////////////////// +TEST(pyrDown) { - int type; - int channels; - //src mat - cv::Mat mat1; - cv::Mat dst; + Mat src, dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - channels = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } - - -}; - -#define VARNAME(A) string(#A); - -////////////////////////////////PyrDown///////////////////////////////////////////////// -TEST_P(PyrDown, Mat) -{ - cv::Size size(MWIDTH, MHEIGHT); - cv::RNG &rng = TS::ptr()->get_rng(); - mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); - - - cv::ocl::oclMat gdst; - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - - for (int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat gmat1(mat1); - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::pyrDown(gmat1, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - gdst.download(cpu_dst); - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if (j == 0) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - continue; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + pyrDown(src, dst); + + CPU_ON; + pyrDown(src, dst); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_dst; + + WARMUP_ON; + ocl::pyrDown(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::pyrDown(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::pyrDown(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - -} - -//********test**************** -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine( - Values(CV_8U, CV_32F), Values(1, 4))); - - -#endif // HAVE_OPENCL +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_pyrlk.cpp b/modules/ocl/perf/perf_pyrlk.cpp new file mode 100644 index 000000000..f7fc22b9d --- /dev/null +++ b/modules/ocl/perf/perf_pyrlk.cpp @@ -0,0 +1,143 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" + +///////////// PyrLKOpticalFlow //////////////////////// +TEST(PyrLKOpticalFlow) +{ + std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"}; + std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"}; + + for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++) + { + Mat frame0 = imread(abspath(images1[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE); + + if (frame0.empty()) + { + std::string errstr = "can't open " + images1[i]; + throw runtime_error(errstr); + } + + Mat frame1 = imread(abspath(images2[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE); + + if (frame1.empty()) + { + std::string errstr = "can't open " + images2[i]; + throw runtime_error(errstr); + } + + Mat gray_frame; + + if (i == 0) + { + cvtColor(frame0, gray_frame, COLOR_BGR2GRAY); + } + + for (int points = Min_Size; points <= Max_Size; points *= Multiple) + { + if (i == 0) + SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points"; + else + SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points"; + Mat nextPts_cpu; + Mat status_cpu; + + vector<Point2f> pts; + goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0); + + vector<Point2f> nextPts; + vector<unsigned char> status; + + vector<float> err; + + calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); + + CPU_ON; + calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); + CPU_OFF; + + ocl::PyrLKOpticalFlow d_pyrLK; + + ocl::oclMat d_frame0(frame0); + ocl::oclMat d_frame1(frame1); + + ocl::oclMat d_pts; + Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]); + d_pts.upload(pts_mat); + + ocl::oclMat d_nextPts; + ocl::oclMat d_status; + ocl::oclMat d_err; + + WARMUP_ON; + d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); + WARMUP_OFF; + + GPU_ON; + d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); + ; + GPU_OFF; + + GPU_FULL_ON; + d_frame0.upload(frame0); + d_frame1.upload(frame1); + d_pts.upload(pts_mat); + d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); + + if (!d_nextPts.empty()) + { + d_nextPts.download(nextPts_cpu); + } + + if (!d_status.empty()) + { + d_status.download(status_cpu); + } + + GPU_FULL_OFF; + } + + } +} diff --git a/modules/ocl/perf/perf_pyrup.cpp b/modules/ocl/perf/perf_pyrup.cpp index a023353ed..d3b3003a2 100644 --- a/modules/ocl/perf/perf_pyrup.cpp +++ b/modules/ocl/perf/perf_pyrup.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// fangfang bai fangfang@multicorewareinc.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -42,81 +42,46 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - -#include "opencv2/core/core.hpp" #include "precomp.hpp" -#include <iomanip> -#ifdef HAVE_OPENCL -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; - -PARAM_TEST_CASE(PyrUp, MatType, int) +///////////// pyrUp //////////////////////// +TEST(pyrUp) { - int type; - int channels; - //std::vector<cv::ocl::Info> oclinfo; + Mat src, dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - virtual void SetUp() + for (int size = 500; size <= 2000; size *= 2) { - type = GET_PARAM(0); - channels = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } -}; - -TEST_P(PyrUp, Performance) -{ - cv::Size size(MWIDTH, MHEIGHT); - cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels)); - cv::Mat dst_gold; - cv::ocl::oclMat dst; - - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - - for (int j = 0; j < LOOP_TIMES + 1; j ++) - { - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::pyrUp(srcMat, dst); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - dst.download(cpu_dst); //download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if (j == 0) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - continue; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + pyrUp(src, dst); + + CPU_ON; + pyrUp(src, dst); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_dst; + + WARMUP_ON; + ocl::pyrUp(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::pyrUp(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::pyrUp(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - } - - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - -} - -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine( - Values(CV_8U, CV_32F), Values(1, 4))); - -#endif // HAVE_OPENCL \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_split_merge.cpp b/modules/ocl/perf/perf_split_merge.cpp index 67a3d24ae..48ff1ff15 100644 --- a/modules/ocl/perf/perf_split_merge.cpp +++ b/modules/ocl/perf/perf_split_merge.cpp @@ -10,12 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Jia Haipeng, jiahaipeng95@gmail.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -42,446 +42,109 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#ifdef HAVE_OPENCL - -using namespace cvtest; -using namespace testing; -using namespace std; -using namespace cv::ocl; -PARAM_TEST_CASE(MergeTestBase, MatType, int) +///////////// Merge//////////////////////// +TEST(Merge) { - int type; - int channels; + Mat dst; + ocl::oclMat d_dst; - //src mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mat3; - cv::Mat mat4; + int channels = 4; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - //dst mat - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int src3x; - int src3y; - int src4x; - int src4y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mat3_roi; - cv::Mat mat4_roi; - - //dst mat with roi - cv::Mat dst_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gmat3; - cv::ocl::oclMat gmat4; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - channels = GET_PARAM(1); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + Size size1 = Size(size, size); + std::vector<Mat> src(channels); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + for (int i = 0; i < channels; ++i) + { + src[i] = Mat(size1, all_type[j], cv::Scalar::all(i)); + } + + merge(src, dst); + + CPU_ON; + merge(src, dst); + CPU_OFF; + + std::vector<ocl::oclMat> d_src(channels); + + for (int i = 0; i < channels; ++i) + { + d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i)); + } + + WARMUP_ON; + ocl::merge(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::merge(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + + for (int i = 0; i < channels; ++i) + { + d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i)); + } + + ocl::merge(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; + } - mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols - 1; //start - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - src2x = 1; - src2y = 1; - src3x = 1; - src3y = 1; - src4x = 1; - src4y = 1; - dstx = 1; - dsty = 1; - - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - src2x = 0; - src2y = 0; - src3x = 0; - src3y = 0; - src4x = 0; - src4y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - mat3_roi = mat3(Rect(src3x, src3y, roicols, roirows)); - mat4_roi = mat4(Rect(src4x, src4y, roicols, roirows)); - - - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - } - -}; - -struct Merge : MergeTestBase {}; - -TEST_P(Merge, Accuracy) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - std::vector<cv::Mat> dev_src; - dev_src.push_back(mat1_roi); - dev_src.push_back(mat2_roi); - dev_src.push_back(mat3_roi); - dev_src.push_back(mat4_roi); - t0 = (double)cvGetTickCount();//cpu start - cv::merge(dev_src, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 ] - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmat3 = mat3_roi; - gmat4 = mat4_roi; - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - std::vector<cv::ocl::oclMat> dev_gsrc; - dev_gsrc.push_back(gmat1); - dev_gsrc.push_back(gmat2); - dev_gsrc.push_back(gmat3); - dev_gsrc.push_back(gmat4); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::merge(dev_gsrc, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmat3 = mat3_roi; - gmat4 = mat4_roi; - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - std::vector<cv::ocl::oclMat> dev_gsrc; - dev_gsrc.push_back(gmat1); - dev_gsrc.push_back(gmat2); - dev_gsrc.push_back(gmat3); - dev_gsrc.push_back(gmat4); - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::merge(dev_gsrc, gdst); - }; -#endif } - -PARAM_TEST_CASE(SplitTestBase, MatType, int) +///////////// Split//////////////////////// +TEST(Split) { - int type; - int channels; + //int channels = 4; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - //src mat - cv::Mat mat; - - //dstmat - cv::Mat dst1; - cv::Mat dst2; - cv::Mat dst3; - cv::Mat dst4; - - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dst1x; - int dst1y; - int dst2x; - int dst2y; - int dst3x; - int dst3y; - int dst4x; - int dst4y; - - //src mat with roi - cv::Mat mat_roi; - - //dst mat with roi - cv::Mat dst1_roi; - cv::Mat dst2_roi; - cv::Mat dst3_roi; - cv::Mat dst4_roi; - //std::vector<cv::ocl::Info> oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst1_whole; - cv::ocl::oclMat gdst2_whole; - cv::ocl::oclMat gdst3_whole; - cv::ocl::oclMat gdst4_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst1; - cv::ocl::oclMat gdst2; - cv::ocl::oclMat gdst3; - cv::ocl::oclMat gdst4; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - channels = GET_PARAM(1); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j]; + Size size1 = Size(size, size); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); + + std::vector<cv::Mat> dst; + + split(src, dst); + + CPU_ON; + split(src, dst); + CPU_OFF; + + ocl::oclMat d_src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); + std::vector<cv::ocl::oclMat> d_dst; + + WARMUP_ON; + ocl::split(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::split(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::split(d_src, d_dst); + GPU_FULL_OFF; + } - mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); - dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols - 1; //start - roirows = mat.rows - 1; - srcx = 1; - srcx = 1; - dst1x = 1; - dst1y = 1; - dst2x = 1; - dst2y = 1; - dst3x = 1; - dst3y = 1; - dst4x = 1; - dst4y = 1; - } - else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dst1x = 0; - dst1y = 0; - dst2x = 0; - dst2y = 0; - dst3x = 0; - dst3y = 0; - dst4x = 0; - dst4y = 0; - }; - - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); - - dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); - dst2_roi = dst2(Rect(dst2x, dst2y, roicols, roirows)); - dst3_roi = dst3(Rect(dst3x, dst3y, roicols, roirows)); - dst4_roi = dst4(Rect(dst4x, dst4y, roicols, roirows)); - } - -}; - -struct Split : SplitTestBase {}; - -TEST_P(Split, Accuracy) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi}; - cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4}; - t0 = (double)cvGetTickCount();//cpu start - cv::split(mat_roi, dev_dst); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows)); - - gdst2_whole = dst2; - gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows)); - - gdst3_whole = dst3; - gdst3 = gdst3_whole(Rect(dst3x, dst3y, roicols, roirows)); - - gdst4_whole = dst4; - gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows)); - - gmat = mat_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::split(gmat, dev_gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst1; - cv::Mat cpu_dst2; - cv::Mat cpu_dst3; - cv::Mat cpu_dst4; - gdst1_whole.download(cpu_dst1); - gdst2_whole.download(cpu_dst2); - gdst3_whole.download(cpu_dst3); - gdst4_whole.download(cpu_dst4); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - //cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi}; - cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4}; - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows)); - - gdst2_whole = dst2; - gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows)); - - gdst3_whole = dst3; - gdst3 = gdst3_whole(Rect(dst3x, dst3y, roicols, roirows)); - - gdst4_whole = dst4; - gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows)); - gmat = mat_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::split(gmat, dev_gdst); - }; -#endif } - -//*************test***************** -INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine( - Values(CV_8UC4, CV_32FC4), Values(1, 4))); - -INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine( - Values(CV_8U, CV_32S, CV_32F), Values(1, 4))); - -#endif // HAVE_OPENCL diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp index 7d287004e..e35a07145 100644 --- a/modules/ocl/perf/precomp.cpp +++ b/modules/ocl/perf/precomp.cpp @@ -7,12 +7,13 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. -// + // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -21,12 +22,12 @@ // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. +// and/or other oclMaterials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -41,4 +42,321 @@ #include "precomp.hpp" +// This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files +// All images needed in this test are in samples/gpu folder. +// For haar template, haarcascade_frontalface_alt.xml shouold be in working directory +void TestSystem::run() +{ + if (is_list_mode_) + { + for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it) + { + cout << (*it)->name() << endl; + } + + return; + } + + // Run test initializers + for (vector<Runnable *>::iterator it = inits_.begin(); it != inits_.end(); ++it) + { + if ((*it)->name().find(test_filter_, 0) != string::npos) + { + (*it)->run(); + } + } + + printHeading(); + writeHeading(); + + // Run tests + for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it) + { + try + { + if ((*it)->name().find(test_filter_, 0) != string::npos) + { + cout << endl << (*it)->name() << ":\n"; + + setCurrentTest((*it)->name()); + //fprintf(record_,"%s\n",(*it)->name().c_str()); + + (*it)->run(); + finishCurrentSubtest(); + } + } + catch (const Exception &) + { + // Message is printed via callback + resetCurrentSubtest(); + } + catch (const runtime_error &e) + { + printError(e.what()); + resetCurrentSubtest(); + } + } + + printSummary(); + writeSummary(); +} + + +void TestSystem::finishCurrentSubtest() +{ + if (cur_subtest_is_empty_) + // There is no need to print subtest statistics + { + return; + } + + double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0; + double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0; + double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0; + + double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_); + speedup_total_ += speedup; + + double fullspeedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_full_elapsed_); + speedup_full_total_ += fullspeedup; + + if (speedup > top_) + { + speedup_faster_count_++; + } + else if (speedup < bottom_) + { + speedup_slower_count_++; + } + else + { + speedup_equal_count_++; + } + + if (fullspeedup > top_) + { + speedup_full_faster_count_++; + } + else if (fullspeedup < bottom_) + { + speedup_full_slower_count_++; + } + else + { + speedup_full_equal_count_++; + } + + // compute min, max and + std::sort(gpu_times_.begin(), gpu_times_.end()); + double gpu_min = gpu_times_.front() / getTickFrequency() * 1000.0; + double gpu_max = gpu_times_.back() / getTickFrequency() * 1000.0; + double deviation = 0; + + if (gpu_times_.size() > 1) + { + double sum = 0; + + for (size_t i = 0; i < gpu_times_.size(); i++) + { + int64 diff = gpu_times_[i] - static_cast<int64>(gpu_elapsed_); + double diff_time = diff * 1000 / getTickFrequency(); + sum += diff_time * diff_time; + } + + deviation = std::sqrt(sum / gpu_times_.size()); + } + + printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); + writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation); + + num_subtests_called_++; + resetCurrentSubtest(); +} + + +double TestSystem::meanTime(const vector<int64> &samples) +{ + double sum = accumulate(samples.begin(), samples.end(), 0.); + return sum / samples.size(); +} + + +void TestSystem::printHeading() +{ + cout << endl; + cout << setiosflags(ios_base::left); + cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" + << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP" + << "DESCRIPTION\n"; + + cout << resetiosflags(ios_base::left); +} + +void TestSystem::writeHeading() +{ + if (!record_) + { + recordname_ += "_OCL.csv"; + record_ = fopen(recordname_.c_str(), "w"); + } + + fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n"); + + fflush(record_); +} + +void TestSystem::printSummary() +{ + cout << setiosflags(ios_base::fixed); + cout << "\naverage GPU speedup: x" + << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_) + << endl; + cout << "\nGPU exceeded: " + << setprecision(3) << speedup_faster_count_ + << "\nGPU passed: " + << setprecision(3) << speedup_equal_count_ + << "\nGPU failed: " + << setprecision(3) << speedup_slower_count_ + << endl; + cout << "\nGPU exceeded rate: " + << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << "\nGPU passed rate: " + << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << "\nGPU failed rate: " + << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << endl; + cout << "\naverage GPUTOTAL speedup: x" + << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_) + << endl; + cout << "\nGPUTOTAL exceeded: " + << setprecision(3) << speedup_full_faster_count_ + << "\nGPUTOTAL passed: " + << setprecision(3) << speedup_full_equal_count_ + << "\nGPUTOTAL failed: " + << setprecision(3) << speedup_full_slower_count_ + << endl; + cout << "\nGPUTOTAL exceeded rate: " + << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << "\nGPUTOTAL passed rate: " + << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << "\nGPUTOTAL failed rate: " + << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << endl; + cout << resetiosflags(ios_base::fixed); +} + + +void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup) +{ + cout << TAB << setiosflags(ios_base::left); + stringstream stream; + + stream << cpu_time; + cout << setw(10) << stream.str(); + + stream.str(""); + stream << gpu_time; + cout << setw(10) << stream.str(); + + stream.str(""); + stream << "x" << setprecision(3) << speedup; + cout << setw(14) << stream.str(); + + stream.str(""); + stream << gpu_full_time; + cout << setw(14) << stream.str(); + + stream.str(""); + stream << "x" << setprecision(3) << fullspeedup; + cout << setw(14) << stream.str(); + + cout << cur_subtest_description_.str(); + cout << resetiosflags(ios_base::left) << endl; +} + +void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev) +{ + if (!record_) + { + recordname_ += ".csv"; + record_ = fopen(recordname_.c_str(), "w"); + } + + fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "", + cur_subtest_description_.str().c_str(), + cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup, + gpu_min, gpu_max, std_dev); + + if (itname_changed_) + { + itname_changed_ = false; + } + + fflush(record_); +} + +void TestSystem::writeSummary() +{ + if (!record_) + { + recordname_ += ".csv"; + record_ = fopen(recordname_.c_str(), "w"); + } + + fprintf(record_, "\nAverage GPU speedup: %.3f\n" + "exceeded: %d (%.3f%%)\n" + "passed: %d (%.3f%%)\n" + "failed: %d (%.3f%%)\n" + "\nAverage GPUTOTAL speedup: %.3f\n" + "exceeded: %d (%.3f%%)\n" + "passed: %d (%.3f%%)\n" + "failed: %d (%.3f%%)\n", + speedup_total_ / std::max(1, num_subtests_called_), + speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100, + speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100, + speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100, + speedup_full_total_ / std::max(1, num_subtests_called_), + speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100, + speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100, + speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100 + ); + fflush(record_); +} + +void TestSystem::printError(const std::string &msg) +{ + if(msg != "CL_INVALID_BUFFER_SIZE") + { + cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl; + } +} + +void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high) +{ + mat.create(rows, cols, type); + RNG rng(0); + rng.fill(mat, RNG::UNIFORM, low, high); +} + + +string abspath(const string &relpath) +{ + return TestSystem::instance().workingDir() + relpath; +} + + +int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/, + const char *err_msg, const char * /*file_name*/, + int /*line*/, void * /*userdata*/) +{ + TestSystem::instance().printError(err_msg); + return 0; +} + diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp index 34eea555f..819ac5925 100644 --- a/modules/ocl/perf/precomp.hpp +++ b/modules/ocl/perf/precomp.hpp @@ -7,12 +7,13 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. -// + // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -21,12 +22,12 @@ // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. +// and/or other oclMaterials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -39,43 +40,352 @@ // //M*/ -#ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wmissing-declarations" -# if defined __clang__ || defined __APPLE__ -# pragma GCC diagnostic ignored "-Wmissing-prototypes" -# pragma GCC diagnostic ignored "-Wextra" -# endif -#endif - -#ifndef __OPENCV_TEST_PRECOMP_HPP__ -#define __OPENCV_TEST_PRECOMP_HPP__ - -#include <cmath> -#include <cstdio> +#include <iomanip> +#include <stdexcept> +#include <string> #include <iostream> -#include <fstream> -#include <sstream> -#include <string> -#include <limits> -#include <algorithm> -#include <iterator> -#include <string> -#include <cstdarg> -#include "cvconfig.h" +#include <cstdio> +#include <vector> +#include <numeric> #include "opencv2/core/core.hpp" -#include "opencv2/highgui/highgui.hpp" -//#include "opencv2/calib3d/calib3d.hpp" #include "opencv2/imgproc/imgproc.hpp" +#include "opencv2/highgui/highgui.hpp" #include "opencv2/video/video.hpp" -#include "opencv2/ts/ts.hpp" -#include "opencv2/ts/ts_perf.hpp" +#include "opencv2/objdetect/objdetect.hpp" +#include "opencv2/features2d/features2d.hpp" #include "opencv2/ocl/ocl.hpp" -//#include "opencv2/nonfree/nonfree.hpp" -#include "utility.hpp" -#include "interpolation.hpp" -//#include "add_test_info.h" -//#define PERF_TEST_OCL 1 +#define Min_Size 1000 +#define Max_Size 4000 +#define Multiple 2 +#define TAB " " -#endif +using namespace std; +using namespace cv; +void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high); +string abspath(const string &relpath); +int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *); +typedef struct +{ + short x; + short y; +} COOR; +COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, + cv::Size size, int sp, int sr, int maxIter, float eps, int *tab); +void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, + int sp, int sr, cv::TermCriteria crit); + +class Runnable +{ +public: + explicit Runnable(const std::string &runname): name_(runname) {} + virtual ~Runnable() {} + + const std::string &name() const + { + return name_; + } + + virtual void run() = 0; + +private: + std::string name_; +}; + +class TestSystem +{ +public: + static TestSystem &instance() + { + static TestSystem me; + return me; + } + + void setWorkingDir(const std::string &val) + { + working_dir_ = val; + } + const std::string &workingDir() const + { + return working_dir_; + } + + void setTestFilter(const std::string &val) + { + test_filter_ = val; + } + const std::string &testFilter() const + { + return test_filter_; + } + + void setNumIters(int num_iters) + { + num_iters_ = num_iters; + } + void setGPUWarmupIters(int num_iters) + { + gpu_warmup_iters_ = num_iters; + } + void setCPUIters(int num_iters) + { + cpu_num_iters_ = num_iters; + } + + void setTopThreshold(double top) + { + top_ = top; + } + void setBottomThreshold(double bottom) + { + bottom_ = bottom; + } + + void addInit(Runnable *init) + { + inits_.push_back(init); + } + void addTest(Runnable *test) + { + tests_.push_back(test); + } + void run(); + + // It's public because OpenCV callback uses it + void printError(const std::string &msg); + + std::stringstream &startNewSubtest() + { + finishCurrentSubtest(); + return cur_subtest_description_; + } + + bool stop() const + { + return cur_iter_idx_ >= num_iters_; + } + + bool cpu_stop() const + { + return cur_iter_idx_ >= cpu_num_iters_; + } + + bool warmupStop() + { + return cur_warmup_idx_++ >= gpu_warmup_iters_; + } + + void warmupComplete() + { + cur_warmup_idx_ = 0; + } + + void cpuOn() + { + cpu_started_ = cv::getTickCount(); + } + void cpuOff() + { + int64 delta = cv::getTickCount() - cpu_started_; + cpu_times_.push_back(delta); + ++cur_iter_idx_; + } + void cpuComplete() + { + cpu_elapsed_ += meanTime(cpu_times_); + cur_subtest_is_empty_ = false; + cur_iter_idx_ = 0; + } + + void gpuOn() + { + gpu_started_ = cv::getTickCount(); + } + void gpuOff() + { + int64 delta = cv::getTickCount() - gpu_started_; + gpu_times_.push_back(delta); + ++cur_iter_idx_; + } + void gpuComplete() + { + gpu_elapsed_ += meanTime(gpu_times_); + cur_subtest_is_empty_ = false; + cur_iter_idx_ = 0; + } + + void gpufullOn() + { + gpu_full_started_ = cv::getTickCount(); + } + void gpufullOff() + { + int64 delta = cv::getTickCount() - gpu_full_started_; + gpu_full_times_.push_back(delta); + ++cur_iter_idx_; + } + void gpufullComplete() + { + gpu_full_elapsed_ += meanTime(gpu_full_times_); + cur_subtest_is_empty_ = false; + cur_iter_idx_ = 0; + } + + bool isListMode() const + { + return is_list_mode_; + } + void setListMode(bool value) + { + is_list_mode_ = value; + } + + void setRecordName(const std::string &name) + { + recordname_ = name; + } + + void setCurrentTest(const std::string &name) + { + itname_ = name; + itname_changed_ = true; + } + +private: + TestSystem(): + cur_subtest_is_empty_(true), cpu_elapsed_(0), + gpu_elapsed_(0), gpu_full_elapsed_(0), speedup_total_(0.0), + num_subtests_called_(0), + speedup_faster_count_(0), speedup_slower_count_(0), speedup_equal_count_(0), + speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false), + num_iters_(10), cpu_num_iters_(2), + gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0), + record_(0), recordname_("performance"), itname_changed_(true) + { + cpu_times_.reserve(num_iters_); + gpu_times_.reserve(num_iters_); + gpu_full_times_.reserve(num_iters_); + } + + void finishCurrentSubtest(); + void resetCurrentSubtest() + { + cpu_elapsed_ = 0; + gpu_elapsed_ = 0; + gpu_full_elapsed_ = 0; + cur_subtest_description_.str(""); + cur_subtest_is_empty_ = true; + cur_iter_idx_ = 0; + cpu_times_.clear(); + gpu_times_.clear(); + gpu_full_times_.clear(); + } + + double meanTime(const std::vector<int64> &samples); + + void printHeading(); + void printSummary(); + void printMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f); + + void writeHeading(); + void writeSummary(); + void writeMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, + double speedup = 0.0f, double fullspeedup = 0.0f, + double gpu_min = 0.0f, double gpu_max = 0.0f, double std_dev = 0.0f); + + std::string working_dir_; + std::string test_filter_; + + std::vector<Runnable *> inits_; + std::vector<Runnable *> tests_; + + std::stringstream cur_subtest_description_; + bool cur_subtest_is_empty_; + + int64 cpu_started_; + int64 gpu_started_; + int64 gpu_full_started_; + double cpu_elapsed_; + double gpu_elapsed_; + double gpu_full_elapsed_; + + double speedup_total_; + double speedup_full_total_; + int num_subtests_called_; + + int speedup_faster_count_; + int speedup_slower_count_; + int speedup_equal_count_; + + int speedup_full_faster_count_; + int speedup_full_slower_count_; + int speedup_full_equal_count_; + + bool is_list_mode_; + + double top_; + double bottom_; + + int num_iters_; + int cpu_num_iters_; //there's no need to set cpu running same times with gpu + int gpu_warmup_iters_; //gpu warm up times, default is 1 + int cur_iter_idx_; + int cur_warmup_idx_; //current gpu warm up times + std::vector<int64> cpu_times_; + std::vector<int64> gpu_times_; + std::vector<int64> gpu_full_times_; + + FILE *record_; + std::string recordname_; + std::string itname_; + bool itname_changed_; +}; + + +#define GLOBAL_INIT(name) \ +struct name##_init: Runnable { \ + name##_init(): Runnable(#name) { \ + TestSystem::instance().addInit(this); \ +} \ + void run(); \ +} name##_init_instance; \ + void name##_init::run() + + +#define TEST(name) \ +struct name##_test: Runnable { \ + name##_test(): Runnable(#name) { \ + TestSystem::instance().addTest(this); \ +} \ + void run(); \ +} name##_test_instance; \ + void name##_test::run() + +#define SUBTEST TestSystem::instance().startNewSubtest() + +#define CPU_ON \ + while (!TestSystem::instance().cpu_stop()) { \ + TestSystem::instance().cpuOn() +#define CPU_OFF \ + TestSystem::instance().cpuOff(); \ + } TestSystem::instance().cpuComplete() + +#define GPU_ON \ + while (!TestSystem::instance().stop()) { \ + TestSystem::instance().gpuOn() +#define GPU_OFF \ + TestSystem::instance().gpuOff(); \ + } TestSystem::instance().gpuComplete() + +#define GPU_FULL_ON \ + while (!TestSystem::instance().stop()) { \ + TestSystem::instance().gpufullOn() +#define GPU_FULL_OFF \ + TestSystem::instance().gpufullOff(); \ + } TestSystem::instance().gpufullComplete() + +#define WARMUP_ON \ + while (!TestSystem::instance().warmupStop()) { +#define WARMUP_OFF \ + } TestSystem::instance().warmupComplete() diff --git a/modules/ocl/perf/utility.cpp b/modules/ocl/perf/utility.cpp deleted file mode 100644 index b7fbe4fa0..000000000 --- a/modules/ocl/perf/utility.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#define VARNAME(A) #A -using namespace std; -using namespace cv; -using namespace cv::gpu; -using namespace cvtest; - - -//std::string generateVarList(int first,...) -//{ -// vector<std::string> varname; -// -// va_list argp; -// string s; -// stringstream ss; -// va_start(argp,first); -// int i=first; -// while(i!=-1) -// { -// ss<<i<<","; -// i=va_arg(argp,int); -// }; -// s=ss.str(); -// va_end(argp); -// return s; -//}; - -//std::string generateVarList(int& p1,int& p2) -//{ -// stringstream ss; -// ss<<VARNAME(p1)<<":"<<src1x<<","<<VARNAME(p2)<<":"<<src1y; -// return ss.str(); -//}; - -int randomInt(int minVal, int maxVal) -{ - RNG &rng = TS::ptr()->get_rng(); - return rng.uniform(minVal, maxVal); -} - -double randomDouble(double minVal, double maxVal) -{ - RNG &rng = TS::ptr()->get_rng(); - return rng.uniform(minVal, maxVal); -} - -Size randomSize(int minVal, int maxVal) -{ - return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal)); -} - -Scalar randomScalar(double minVal, double maxVal) -{ - return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal)); -} - -Mat randomMat(Size size, int type, double minVal, double maxVal) -{ - return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false); -} - - - - - - - -/* -void showDiff(InputArray gold_, InputArray actual_, double eps) -{ - Mat gold; - if (gold_.kind() == _InputArray::MAT) - gold = gold_.getMat(); - else - gold_.getGpuMat().download(gold); - - Mat actual; - if (actual_.kind() == _InputArray::MAT) - actual = actual_.getMat(); - else - actual_.getGpuMat().download(actual); - - Mat diff; - absdiff(gold, actual, diff); - threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY); - - namedWindow("gold", WINDOW_NORMAL); - namedWindow("actual", WINDOW_NORMAL); - namedWindow("diff", WINDOW_NORMAL); - - imshow("gold", gold); - imshow("actual", actual); - imshow("diff", diff); - - waitKey(); -} -*/ - -/* -bool supportFeature(const DeviceInfo& info, FeatureSet feature) -{ - return TargetArchs::builtWith(feature) && info.supports(feature); -} - -const vector<DeviceInfo>& devices() -{ - static vector<DeviceInfo> devs; - static bool first = true; - - if (first) - { - int deviceCount = getCudaEnabledDeviceCount(); - - devs.reserve(deviceCount); - - for (int i = 0; i < deviceCount; ++i) - { - DeviceInfo info(i); - if (info.isCompatible()) - devs.push_back(info); - } - - first = false; - } - - return devs; -} - -vector<DeviceInfo> devices(FeatureSet feature) -{ - const vector<DeviceInfo>& d = devices(); - - vector<DeviceInfo> devs_filtered; - - if (TargetArchs::builtWith(feature)) - { - devs_filtered.reserve(d.size()); - - for (size_t i = 0, size = d.size(); i < size; ++i) - { - const DeviceInfo& info = d[i]; - - if (info.supports(feature)) - devs_filtered.push_back(info); - } - } - - return devs_filtered; -} -*/ - -vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end) -{ - vector<MatType> v; - - v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1)); - - for (int depth = depth_start; depth <= depth_end; ++depth) - { - for (int cn = cn_start; cn <= cn_end; ++cn) - { - v.push_back(CV_MAKETYPE(depth, cn)); - } - } - - return v; -} - -const vector<MatType> &all_types() -{ - static vector<MatType> v = types(CV_8U, CV_64F, 1, 4); - - return v; -} - -Mat readImage(const string &fileName, int flags) -{ - return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags); -} - -Mat readImageType(const string &fname, int type) -{ - Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR); - if (CV_MAT_CN(type) == 4) - { - Mat temp; - cvtColor(src, temp, cv::COLOR_BGR2BGRA); - swap(src, temp); - } - src.convertTo(src, CV_MAT_DEPTH(type)); - return src; -} - -double checkNorm(const Mat &m) -{ - return norm(m, NORM_INF); -} - -double checkNorm(const Mat &m1, const Mat &m2) -{ - return norm(m1, m2, NORM_INF); -} - -double checkSimilarity(const Mat &m1, const Mat &m2) -{ - Mat diff; - matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED); - return std::abs(diff.at<float>(0, 0) - 1.f); -} - -/* -void cv::ocl::PrintTo(const DeviceInfo& info, ostream* os) -{ - (*os) << info.name(); -} -*/ - -void PrintTo(const Inverse &inverse, std::ostream *os) -{ - if (inverse) - (*os) << "inverse"; - else - (*os) << "direct"; -} diff --git a/modules/ocl/perf/utility.hpp b/modules/ocl/perf/utility.hpp deleted file mode 100644 index 7d34b6731..000000000 --- a/modules/ocl/perf/utility.hpp +++ /dev/null @@ -1,182 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_TEST_UTILITY_HPP__ -#define __OPENCV_TEST_UTILITY_HPP__ -//#define PRINT_KERNEL_RUN_TIME -#ifdef PRINT_KERNEL_RUN_TIME -#define LOOP_TIMES 1 -#else -#define LOOP_TIMES 1 -#endif -#define MWIDTH 1920 -#define MHEIGHT 1080 -#define CLBINPATH ".\\" -#define LOOPROISTART 0 -#define LOOPROIEND 1 -int randomInt(int minVal, int maxVal); -double randomDouble(double minVal, double maxVal); - -//std::string generateVarList(int first,...); -std::string generateVarList(int &p1, int &p2); -cv::Size randomSize(int minVal, int maxVal); -cv::Scalar randomScalar(double minVal, double maxVal); -cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0); - -void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); - -//! return true if device supports specified feature and gpu module was built with support the feature. -//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature); - -//! return all devices compatible with current gpu module build. -//const std::vector<cv::ocl::DeviceInfo>& devices(); -//! return all devices compatible with current gpu module build which support specified feature. -//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature); - -//! read image from testdata folder. -cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR); -cv::Mat readImageType(const std::string &fname, int type); - -double checkNorm(const cv::Mat &m); -double checkNorm(const cv::Mat &m1, const cv::Mat &m2); -double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2); - -#define EXPECT_MAT_NORM(mat, eps) \ -{ \ - EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \ -} - -/*#define EXPECT_MAT_NEAR(mat1, mat2, eps) \ -{ \ - ASSERT_EQ(mat1.type(), mat2.type()); \ - ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \ -}*/ - -#define EXPECT_MAT_NEAR(mat1, mat2, eps,s) \ -{ \ - ASSERT_EQ(mat1.type(), mat2.type()); \ - ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps)<<s; \ -} - -#define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \ -{ \ - ASSERT_EQ(mat1.type(), mat2.type()); \ - ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \ -} - -namespace cv -{ - namespace ocl - { - // void PrintTo(const DeviceInfo& info, std::ostream* os); - } -} - -using perf::MatDepth; -using perf::MatType; - -//! return vector with types from specified range. -std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end); - -//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4). -const std::vector<MatType> &all_types(); - -class Inverse -{ -public: - inline Inverse(bool val = false) : val_(val) {} - - inline operator bool() const - { - return val_; - } - -private: - bool val_; -}; - -void PrintTo(const Inverse &useRoi, std::ostream *os); - -CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE) - -CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX) - -enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1}; -CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y) - -CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN) - -CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T); - -CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT) - -CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV) - -CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC) - -CV_ENUM(Border, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP) - -CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP) - -CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED) - -CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT) - -void run_perf_test(); - -#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > - -#define GET_PARAM(k) std::tr1::get< k >(GetParam()) - -#define ALL_DEVICES testing::ValuesIn(devices()) -#define DEVICES(feature) testing::ValuesIn(devices(feature)) - -#define ALL_TYPES testing::ValuesIn(all_types()) -#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end)) - -#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113)) - -#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true)) - -#endif // __OPENCV_TEST_UTILITY_HPP__ diff --git a/samples/ocl/performance.cpp b/samples/ocl/performance.cpp deleted file mode 100644 index 695516f14..000000000 --- a/samples/ocl/performance.cpp +++ /dev/null @@ -1,4397 +0,0 @@ -#include <iomanip> -#include <stdexcept> -#include <string> -#include <iostream> -#include <cstdio> -#include <vector> -#include <numeric> -#include "opencv2/core/core.hpp" -#include "opencv2/imgproc/imgproc.hpp" -#include "opencv2/highgui/highgui.hpp" -#include "opencv2/calib3d/calib3d.hpp" -#include "opencv2/video/video.hpp" -#include "opencv2/nonfree/nonfree.hpp" -#include "opencv2/objdetect/objdetect.hpp" -#include "opencv2/features2d/features2d.hpp" -#define USE_OPENCL -#ifdef USE_OPENCL -#include "opencv2/ocl/ocl.hpp" -#include "opencv2/nonfree/ocl.hpp" -#endif - -#define TAB " " - -using namespace std; -using namespace cv; - -// This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files -// All images needed in this test are in samples/gpu folder. -// For haar template, haarcascade_frontalface_alt.xml shouold be in working directory - -void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high); -string abspath(const string &relpath); -int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *); -typedef struct -{ - short x; - short y; -} COOR; -COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, - cv::Size size, int sp, int sr, int maxIter, float eps, int *tab); -void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, - int sp, int sr, cv::TermCriteria crit); - -class Runnable -{ -public: - explicit Runnable(const std::string &runname): name_(runname) {} - virtual ~Runnable() {} - - const std::string &name() const - { - return name_; - } - - virtual void run() = 0; - -private: - std::string name_; -}; - -class TestSystem -{ -public: - static TestSystem &instance() - { - static TestSystem me; - return me; - } - - void setWorkingDir(const std::string &val) - { - working_dir_ = val; - } - const std::string &workingDir() const - { - return working_dir_; - } - - void setTestFilter(const std::string &val) - { - test_filter_ = val; - } - const std::string &testFilter() const - { - return test_filter_; - } - - void setNumIters(int num_iters) - { - num_iters_ = num_iters; - } - void setGPUWarmupIters(int num_iters) - { - gpu_warmup_iters_ = num_iters; - } - void setCPUIters(int num_iters) - { - cpu_num_iters_ = num_iters; - } - - void setTopThreshold(double top) - { - top_ = top; - } - void setBottomThreshold(double bottom) - { - bottom_ = bottom; - } - - void addInit(Runnable *init) - { - inits_.push_back(init); - } - void addTest(Runnable *test) - { - tests_.push_back(test); - } - void run(); - - // It's public because OpenCV callback uses it - void printError(const std::string &msg); - - std::stringstream &startNewSubtest() - { - finishCurrentSubtest(); - return cur_subtest_description_; - } - - bool stop() const - { - return cur_iter_idx_ >= num_iters_; - } - - bool cpu_stop() const - { - return cur_iter_idx_ >= cpu_num_iters_; - } - - bool warmupStop() - { - return cur_warmup_idx_++ >= gpu_warmup_iters_; - } - - void warmupComplete() - { - cur_warmup_idx_ = 0; - } - - void cpuOn() - { - cpu_started_ = cv::getTickCount(); - } - void cpuOff() - { - int64 delta = cv::getTickCount() - cpu_started_; - cpu_times_.push_back(delta); - ++cur_iter_idx_; - } - void cpuComplete() - { - cpu_elapsed_ += meanTime(cpu_times_); - cur_subtest_is_empty_ = false; - cur_iter_idx_ = 0; - } - - void gpuOn() - { - gpu_started_ = cv::getTickCount(); - } - void gpuOff() - { - int64 delta = cv::getTickCount() - gpu_started_; - gpu_times_.push_back(delta); - ++cur_iter_idx_; - } - void gpuComplete() - { - gpu_elapsed_ += meanTime(gpu_times_); - cur_subtest_is_empty_ = false; - cur_iter_idx_ = 0; - } - - void gpufullOn() - { - gpu_full_started_ = cv::getTickCount(); - } - void gpufullOff() - { - int64 delta = cv::getTickCount() - gpu_full_started_; - gpu_full_times_.push_back(delta); - ++cur_iter_idx_; - } - void gpufullComplete() - { - gpu_full_elapsed_ += meanTime(gpu_full_times_); - cur_subtest_is_empty_ = false; - cur_iter_idx_ = 0; - } - - bool isListMode() const - { - return is_list_mode_; - } - void setListMode(bool value) - { - is_list_mode_ = value; - } - - void setRecordName(const std::string &name) - { - recordname_ = name; - } - - void setCurrentTest(const std::string &name) - { - itname_ = name; - itname_changed_ = true; - } - -private: - TestSystem(): - cur_subtest_is_empty_(true), cpu_elapsed_(0), - gpu_elapsed_(0), gpu_full_elapsed_(0), speedup_total_(0.0), - num_subtests_called_(0), - speedup_faster_count_(0), speedup_slower_count_(0), speedup_equal_count_(0), - speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false), - num_iters_(10), cpu_num_iters_(2), - gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0), - record_(0), recordname_("performance"), itname_changed_(true) - { - cpu_times_.reserve(num_iters_); - gpu_times_.reserve(num_iters_); - gpu_full_times_.reserve(num_iters_); - } - - void finishCurrentSubtest(); - void resetCurrentSubtest() - { - cpu_elapsed_ = 0; - gpu_elapsed_ = 0; - gpu_full_elapsed_ = 0; - cur_subtest_description_.str(""); - cur_subtest_is_empty_ = true; - cur_iter_idx_ = 0; - cpu_times_.clear(); - gpu_times_.clear(); - gpu_full_times_.clear(); - } - - double meanTime(const std::vector<int64> &samples); - - void printHeading(); - void printSummary(); - void printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup); - - void writeHeading(); - void writeSummary(); - void writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, - double speedup, double fullspeedup, - double gpu_min, double gpu_max, double std_dev); - - std::string working_dir_; - std::string test_filter_; - - std::vector<Runnable *> inits_; - std::vector<Runnable *> tests_; - - std::stringstream cur_subtest_description_; - bool cur_subtest_is_empty_; - - int64 cpu_started_; - int64 gpu_started_; - int64 gpu_full_started_; - double cpu_elapsed_; - double gpu_elapsed_; - double gpu_full_elapsed_; - - double speedup_total_; - double speedup_full_total_; - int num_subtests_called_; - - int speedup_faster_count_; - int speedup_slower_count_; - int speedup_equal_count_; - - int speedup_full_faster_count_; - int speedup_full_slower_count_; - int speedup_full_equal_count_; - - bool is_list_mode_; - - double top_; - double bottom_; - - int num_iters_; - int cpu_num_iters_; //there's no need to set cpu running same times with gpu - int gpu_warmup_iters_; //gpu warm up times, default is 1 - int cur_iter_idx_; - int cur_warmup_idx_; //current gpu warm up times - std::vector<int64> cpu_times_; - std::vector<int64> gpu_times_; - std::vector<int64> gpu_full_times_; - - FILE *record_; - std::string recordname_; - std::string itname_; - bool itname_changed_; -}; - - -#define GLOBAL_INIT(name) \ - struct name##_init: Runnable { \ - name##_init(): Runnable(#name) { \ - TestSystem::instance().addInit(this); \ - } \ - void run(); \ - } name##_init_instance; \ - void name##_init::run() - - -#define TEST(name) \ - struct name##_test: Runnable { \ - name##_test(): Runnable(#name) { \ - TestSystem::instance().addTest(this); \ - } \ - void run(); \ - } name##_test_instance; \ - void name##_test::run() - -#define SUBTEST TestSystem::instance().startNewSubtest() - -#define CPU_ON \ - while (!TestSystem::instance().cpu_stop()) { \ - TestSystem::instance().cpuOn() -#define CPU_OFF \ - TestSystem::instance().cpuOff(); \ - } TestSystem::instance().cpuComplete() - -#define GPU_ON \ - while (!TestSystem::instance().stop()) { \ - TestSystem::instance().gpuOn() -#define GPU_OFF \ - TestSystem::instance().gpuOff(); \ - } TestSystem::instance().gpuComplete() - -#define GPU_FULL_ON \ - while (!TestSystem::instance().stop()) { \ - TestSystem::instance().gpufullOn() -#define GPU_FULL_OFF \ - TestSystem::instance().gpufullOff(); \ - } TestSystem::instance().gpufullComplete() - -#define WARMUP_ON \ - while (!TestSystem::instance().warmupStop()) { -#define WARMUP_OFF \ - } TestSystem::instance().warmupComplete() - -void TestSystem::run() -{ - if (is_list_mode_) - { - for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it) - { - cout << (*it)->name() << endl; - } - - return; - } - - // Run test initializers - for (vector<Runnable *>::iterator it = inits_.begin(); it != inits_.end(); ++it) - { - if ((*it)->name().find(test_filter_, 0) != string::npos) - { - (*it)->run(); - } - } - - printHeading(); - writeHeading(); - - // Run tests - for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it) - { - try - { - if ((*it)->name().find(test_filter_, 0) != string::npos) - { - cout << endl << (*it)->name() << ":\n"; - - setCurrentTest((*it)->name()); - //fprintf(record_,"%s\n",(*it)->name().c_str()); - - (*it)->run(); - finishCurrentSubtest(); - } - } - catch (const Exception &) - { - // Message is printed via callback - resetCurrentSubtest(); - } - catch (const runtime_error &e) - { - printError(e.what()); - resetCurrentSubtest(); - } - } - -#ifdef USE_OPENCL - printSummary(); - writeSummary(); -#endif -} - - -void TestSystem::finishCurrentSubtest() -{ - if (cur_subtest_is_empty_) - // There is no need to print subtest statistics - { - return; - } - - double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0; - double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0; - double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0; - - double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_); - speedup_total_ += speedup; - - double fullspeedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_full_elapsed_); - speedup_full_total_ += fullspeedup; - - if (speedup > top_) - { - speedup_faster_count_++; - } - else if (speedup < bottom_) - { - speedup_slower_count_++; - } - else - { - speedup_equal_count_++; - } - - if (fullspeedup > top_) - { - speedup_full_faster_count_++; - } - else if (fullspeedup < bottom_) - { - speedup_full_slower_count_++; - } - else - { - speedup_full_equal_count_++; - } - - // compute min, max and - std::sort(gpu_times_.begin(), gpu_times_.end()); - double gpu_min = gpu_times_.front() / getTickFrequency() * 1000.0; - double gpu_max = gpu_times_.back() / getTickFrequency() * 1000.0; - double deviation = 0; - - if (gpu_times_.size() > 1) - { - double sum = 0; - - for (size_t i = 0; i < gpu_times_.size(); i++) - { - int64 diff = gpu_times_[i] - static_cast<int64>(gpu_elapsed_); - double diff_time = diff * 1000 / getTickFrequency(); - sum += diff_time * diff_time; - } - - deviation = std::sqrt(sum / gpu_times_.size()); - } - - printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); - writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation); - - num_subtests_called_++; - resetCurrentSubtest(); -} - - -double TestSystem::meanTime(const vector<int64> &samples) -{ - double sum = accumulate(samples.begin(), samples.end(), 0.); - return sum / samples.size(); -} - - -void TestSystem::printHeading() -{ - cout << endl; - cout << setiosflags(ios_base::left); -#ifdef USE_OPENCL - cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" - << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP" - << "DESCRIPTION\n"; -#else - cout << TAB << setw(10) << "CPU, ms\n"; -#endif - cout << resetiosflags(ios_base::left); -} - -void TestSystem::writeHeading() -{ - if (!record_) - { -#ifdef USE_OPENCL - recordname_ += "_OCL.csv"; -#else - recordname_ += "_CPU.csv"; -#endif - record_ = fopen(recordname_.c_str(), "w"); - } - -#ifdef USE_OPENCL - fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n"); -#else - fprintf(record_, "NAME,DESCRIPTION,CPU (ms)\n"); -#endif - fflush(record_); -} - -void TestSystem::printSummary() -{ - cout << setiosflags(ios_base::fixed); - cout << "\naverage GPU speedup: x" - << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_) - << endl; - cout << "\nGPU exceeded: " - << setprecision(3) << speedup_faster_count_ - << "\nGPU passed: " - << setprecision(3) << speedup_equal_count_ - << "\nGPU failed: " - << setprecision(3) << speedup_slower_count_ - << endl; - cout << "\nGPU exceeded rate: " - << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << "\nGPU passed rate: " - << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << "\nGPU failed rate: " - << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << endl; - cout << "\naverage GPUTOTAL speedup: x" - << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_) - << endl; - cout << "\nGPUTOTAL exceeded: " - << setprecision(3) << speedup_full_faster_count_ - << "\nGPUTOTAL passed: " - << setprecision(3) << speedup_full_equal_count_ - << "\nGPUTOTAL failed: " - << setprecision(3) << speedup_full_slower_count_ - << endl; - cout << "\nGPUTOTAL exceeded rate: " - << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << "\nGPUTOTAL passed rate: " - << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << "\nGPUTOTAL failed rate: " - << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << endl; - cout << resetiosflags(ios_base::fixed); -} - - -void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup) -{ - cout << TAB << setiosflags(ios_base::left); - stringstream stream; - - stream << cpu_time; - cout << setw(10) << stream.str(); -#ifdef USE_OPENCL - stream.str(""); - stream << gpu_time; - cout << setw(10) << stream.str(); - - stream.str(""); - stream << "x" << setprecision(3) << speedup; - cout << setw(14) << stream.str(); - - stream.str(""); - stream << gpu_full_time; - cout << setw(14) << stream.str(); - - stream.str(""); - stream << "x" << setprecision(3) << fullspeedup; - cout << setw(14) << stream.str(); -#endif - cout << cur_subtest_description_.str(); - cout << resetiosflags(ios_base::left) << endl; -} - -void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev) -{ - if (!record_) - { - recordname_ += ".csv"; - record_ = fopen(recordname_.c_str(), "w"); - } - -#ifdef USE_OPENCL - fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "", - cur_subtest_description_.str().c_str(), - cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup, - gpu_min, gpu_max, std_dev); -#else - fprintf(record_, "%s,%s,%.3f\n", - itname_changed_ ? itname_.c_str() : "", cur_subtest_description_.str().c_str(), cpu_time); -#endif - - if (itname_changed_) - { - itname_changed_ = false; - } - - fflush(record_); -} - -void TestSystem::writeSummary() -{ - if (!record_) - { - recordname_ += ".csv"; - record_ = fopen(recordname_.c_str(), "w"); - } - - fprintf(record_, "\nAverage GPU speedup: %.3f\n" - "exceeded: %d (%.3f%%)\n" - "passed: %d (%.3f%%)\n" - "failed: %d (%.3f%%)\n" - "\nAverage GPUTOTAL speedup: %.3f\n" - "exceeded: %d (%.3f%%)\n" - "passed: %d (%.3f%%)\n" - "failed: %d (%.3f%%)\n", - speedup_total_ / std::max(1, num_subtests_called_), - speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100, - speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100, - speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100, - speedup_full_total_ / std::max(1, num_subtests_called_), - speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100, - speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100, - speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100 - ); - fflush(record_); -} - -void TestSystem::printError(const std::string &msg) -{ - cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl; -} - -void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high) -{ - mat.create(rows, cols, type); - RNG rng(0); - rng.fill(mat, RNG::UNIFORM, low, high); -} - - -string abspath(const string &relpath) -{ - return TestSystem::instance().workingDir() + relpath; -} - - -int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/, - const char *err_msg, const char * /*file_name*/, - int /*line*/, void * /*userdata*/) -{ - TestSystem::instance().printError(err_msg); - return 0; -} - -/////////// matchTemplate //////////////////////// -//void InitMatchTemplate() -//{ -// Mat src; gen(src, 500, 500, CV_32F, 0, 1); -// Mat templ; gen(templ, 500, 500, CV_32F, 0, 1); -//#ifdef USE_OPENCL -// ocl::oclMat d_src(src), d_templ(templ), d_dst; -// ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); -//#endif -//} -TEST(matchTemplate) -{ - //InitMatchTemplate(); - - Mat src, templ, dst; - int templ_size = 5; - - - for (int size = 1000; size <= 4000; size *= 2) - { - int all_type[] = {CV_32FC1, CV_32FC4}; - std::string type_name[] = {"CV_32FC1", "CV_32FC4"}; - - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - for(templ_size = 5; templ_size <= 5; templ_size *= 5) - { - gen(src, size, size, all_type[j], 0, 1); - - SUBTEST << src.cols << 'x' << src.rows << "; " << type_name[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR"; - - gen(templ, templ_size, templ_size, all_type[j], 0, 1); - - matchTemplate(src, templ, dst, CV_TM_CCORR); - - CPU_ON; - matchTemplate(src, templ, dst, CV_TM_CCORR); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_src(src), d_templ, d_dst; - - d_templ.upload(templ); - - WARMUP_ON; - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); - WARMUP_OFF; - - GPU_ON; - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_templ.upload(templ); - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - } - - int all_type_8U[] = {CV_8UC1}; - std::string type_name_8U[] = {"CV_8UC1"}; - - for (size_t j = 0; j < sizeof(all_type_8U) / sizeof(int); j++) - { - for(templ_size = 5; templ_size <= 5; templ_size *= 5) - { - SUBTEST << src.cols << 'x' << src.rows << "; " << type_name_8U[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR_NORMED"; - - gen(src, size, size, all_type_8U[j], 0, 255); - - gen(templ, templ_size, templ_size, all_type_8U[j], 0, 255); - - matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); - - CPU_ON; - matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_templ(templ), d_dst; - - WARMUP_ON; - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); - WARMUP_OFF; - - GPU_ON; - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_templ.upload(templ); - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - } - } -} - -///////////// PyrLKOpticalFlow //////////////////////// -TEST(PyrLKOpticalFlow) -{ - std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"}; - std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"}; - - for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++) - { - Mat frame0 = imread(abspath(images1[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE); - - if (frame0.empty()) - { - std::string errstr = "can't open " + images1[i]; - throw runtime_error(errstr); - } - - Mat frame1 = imread(abspath(images2[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE); - - if (frame1.empty()) - { - std::string errstr = "can't open " + images2[i]; - throw runtime_error(errstr); - } - - Mat gray_frame; - - if (i == 0) - { - cvtColor(frame0, gray_frame, COLOR_BGR2GRAY); - } - - for (int points = 1000; points <= 4000; points *= 2) - { - if (i == 0) - SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points"; - else - SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points"; - Mat nextPts_cpu; - Mat status_cpu; - - vector<Point2f> pts; - goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0); - - vector<Point2f> nextPts; - vector<unsigned char> status; - - vector<float> err; - - calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); - - CPU_ON; - calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::PyrLKOpticalFlow d_pyrLK; - - ocl::oclMat d_frame0(frame0); - ocl::oclMat d_frame1(frame1); - - ocl::oclMat d_pts; - Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]); - d_pts.upload(pts_mat); - - ocl::oclMat d_nextPts; - ocl::oclMat d_status; - ocl::oclMat d_err; - - WARMUP_ON; - d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); - WARMUP_OFF; - - GPU_ON; - d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); - GPU_OFF; - - GPU_FULL_ON; - d_frame0.upload(frame0); - d_frame1.upload(frame1); - d_pts.upload(pts_mat); - d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); - - if (!d_nextPts.empty()) - { - d_nextPts.download(nextPts_cpu); - } - - if (!d_status.empty()) - { - d_status.download(status_cpu); - } - - GPU_FULL_OFF; -#endif - } - - } -} - - -///////////// pyrDown ////////////////////// -TEST(pyrDown) -{ - Mat src, dst; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - pyrDown(src, dst); - - CPU_ON; - pyrDown(src, dst); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_dst; - - WARMUP_ON; - ocl::pyrDown(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::pyrDown(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::pyrDown(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - } -} - -///////////// pyrUp //////////////////////// -TEST(pyrUp) -{ - Mat src, dst; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 500; size <= 2000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - pyrUp(src, dst); - - CPU_ON; - pyrUp(src, dst); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_dst; - - WARMUP_ON; - ocl::pyrUp(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::pyrUp(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::pyrUp(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - } -} - -///////////// Canny //////////////////////// -TEST(Canny) -{ - Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); - - if (img.empty()) - { - throw runtime_error("can't open aloeL.jpg"); - } - - SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1"; - - Mat edges(img.size(), CV_8UC1); - - CPU_ON; - Canny(img, edges, 50.0, 100.0); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_img(img); - ocl::oclMat d_edges; - ocl::CannyBuf d_buf; - - WARMUP_ON; - ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); - WARMUP_OFF; - - GPU_ON; - ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); - GPU_OFF; - - GPU_FULL_ON; - d_img.upload(img); - ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); - d_edges.download(edges); - GPU_FULL_OFF; -#endif -} - -///////////// Haar //////////////////////// -#ifdef USE_OPENCL -namespace cv -{ -namespace ocl -{ - -struct getRect -{ - Rect operator()(const CvAvgComp &e) const - { - return e.rect; - } -}; - -class CascadeClassifier_GPU : public OclCascadeClassifier -{ -public: - void detectMultiScale(oclMat &image, - CV_OUT std::vector<cv::Rect>& faces, - double scaleFactor = 1.1, - int minNeighbors = 3, int flags = 0, - Size minSize = Size(), - Size maxSize = Size()) - { - (void)maxSize; - MemStorage storage(cvCreateMemStorage(0)); - //CvMat img=image; - CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize); - vector<CvAvgComp> vecAvgComp; - Seq<CvAvgComp>(objs).copyTo(vecAvgComp); - faces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect()); - } - -}; - -} -} -#endif -TEST(Haar) -{ - Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE); - - if (img.empty()) - { - throw runtime_error("can't open basketball1.png"); - } - - CascadeClassifier faceCascadeCPU; - - if (!faceCascadeCPU.load(abspath("haarcascade_frontalface_alt.xml"))) - { - throw runtime_error("can't load haarcascade_frontalface_alt.xml"); - } - - vector<Rect> faces; - - SUBTEST << img.cols << "x" << img.rows << "; scale image"; - CPU_ON; - faceCascadeCPU.detectMultiScale(img, faces, - 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::CascadeClassifier_GPU faceCascade; - - if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml"))) - { - throw runtime_error("can't load haarcascade_frontalface_alt.xml"); - } - - ocl::oclMat d_img(img); - - faces.clear(); - - WARMUP_ON; - faceCascade.detectMultiScale(d_img, faces, - 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); - WARMUP_OFF; - - faces.clear(); - - GPU_ON; - faceCascade.detectMultiScale(d_img, faces, - 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); - GPU_OFF; - - GPU_FULL_ON; - d_img.upload(img); - faceCascade.detectMultiScale(d_img, faces, - 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); - GPU_FULL_OFF; -#endif -} - -///////////// blend //////////////////////// -template <typename T> -void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold) -{ - result_gold.create(img1.size(), img1.type()); - - int cn = img1.channels(); - - for (int y = 0; y < img1.rows; ++y) - { - const float *weights1_row = weights1.ptr<float>(y); - const float *weights2_row = weights2.ptr<float>(y); - const T *img1_row = img1.ptr<T>(y); - const T *img2_row = img2.ptr<T>(y); - T *result_gold_row = result_gold.ptr<T>(y); - - for (int x = 0; x < img1.cols * cn; ++x) - { - float w1 = weights1_row[x / cn]; - float w2 = weights2_row[x / cn]; - result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f)); - } - } -} -TEST(blend) -{ - Mat src1, src2, weights1, weights2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " and CV_32FC1"; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(weights1, size, size, CV_32FC1, 0, 1); - gen(weights2, size, size, CV_32FC1, 0, 1); - - blendLinearGold<uchar>(src1, src2, weights1, weights2, dst); - - CPU_ON; - blendLinearGold<uchar>(src1, src2, weights1, weights2, dst); - CPU_OFF; - -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - d_weights1.upload(weights1); - d_weights2.upload(weights2); - - WARMUP_ON; - ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - d_weights1.upload(weights1); - d_weights2.upload(weights2); - ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - } -} -///////////// columnSum//////////////////////// -TEST(columnSum) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; CV_32FC1"; - - gen(src, size, size, CV_32FC1, 0, 256); - - CPU_ON; - dst.create(src.size(), src.type()); - - for (int i = 1; i < src.rows; ++i) - { - for (int j = 0; j < src.cols; ++j) - { - dst.at<float>(i, j) = src.at<float>(i, j) += src.at<float>(i - 1, j); - } - } - - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - WARMUP_ON; - ocl::columnSum(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::columnSum(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::columnSum(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} - -///////////// HOG//////////////////////// -TEST(HOG) -{ - Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE); - - if (src.empty()) - { - throw runtime_error("can't open road.png"); - } - - - cv::HOGDescriptor hog; - hog.setSVMDetector(hog.getDefaultPeopleDetector()); - std::vector<cv::Rect> found_locations; - - SUBTEST << 768 << 'x' << 576 << "; road.png"; - - hog.detectMultiScale(src, found_locations); - - CPU_ON; - hog.detectMultiScale(src, found_locations); - CPU_OFF; - -#ifdef USE_OPENCL - cv::ocl::HOGDescriptor ocl_hog; - ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector()); - ocl::oclMat d_src; - d_src.upload(src); - - WARMUP_ON; - ocl_hog.detectMultiScale(d_src, found_locations); - WARMUP_OFF; - - GPU_ON; - ocl_hog.detectMultiScale(d_src, found_locations); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl_hog.detectMultiScale(d_src, found_locations); - GPU_FULL_OFF; -#endif -} - -///////////// SURF //////////////////////// - -TEST(SURF) -{ - Mat keypoints_cpu; - Mat descriptors_cpu; - - Mat src = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); - - if (src.empty()) - { - throw runtime_error("can't open aloeL.jpg"); - } - - SUBTEST << src.cols << "x" << src.rows << "; aloeL.jpg"; - SURF surf; - vector<KeyPoint> keypoints; - Mat descriptors; - - surf(src, Mat(), keypoints, descriptors); - - CPU_ON; - keypoints.clear(); - surf(src, Mat(), keypoints, descriptors); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::SURF_OCL d_surf; - ocl::oclMat d_src(src); - ocl::oclMat d_keypoints; - ocl::oclMat d_descriptors; - - WARMUP_ON; - d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); - WARMUP_OFF; - - GPU_ON; - d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); - - if (!d_keypoints.empty()) - { - d_keypoints.download(keypoints_cpu); - } - - if (!d_descriptors.empty()) - { - d_descriptors.download(descriptors_cpu); - } - - GPU_FULL_OFF; -#endif -} -//////////////////// BruteForceMatch ///////////////// -TEST(BruteForceMatcher) -{ - Mat trainIdx_cpu; - Mat distance_cpu; - Mat allDist_cpu; - Mat nMatches_cpu; - - for (int size = 1000; size <= 4000; size *= 2) - { - // Init CPU matcher - int desc_len = 64; - - BFMatcher matcher(NORM_L2); - - Mat query; - gen(query, size, desc_len, CV_32F, 0, 1); - - Mat train; - gen(train, size, desc_len, CV_32F, 0, 1); - // Output - vector< vector<DMatch> > matches(2); -#ifdef USE_OPENCL - // Init GPU matcher - ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist); - - ocl::oclMat d_query(query); - ocl::oclMat d_train(train); - - ocl::oclMat d_trainIdx, d_distance, d_allDist, d_nMatches; -#endif - SUBTEST << size << "; match"; - - matcher.match(query, train, matches[0]); - - CPU_ON; - matcher.match(query, train, matches[0]); - CPU_OFF; - -#ifdef USE_OPENCL - WARMUP_ON; - d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); - WARMUP_OFF; - - GPU_ON; - d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); - GPU_OFF; - - GPU_FULL_ON; - d_query.upload(query); - d_train.upload(train); - d_matcher.match(d_query, d_train, matches[0]); - GPU_FULL_OFF; -#endif - - SUBTEST << size << "; knnMatch"; - - matcher.knnMatch(query, train, matches, 2); - - CPU_ON; - matcher.knnMatch(query, train, matches, 2); - CPU_OFF; - -#ifdef USE_OPENCL - WARMUP_ON; - d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); - WARMUP_OFF; - - GPU_ON; - d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); - GPU_OFF; - - GPU_FULL_ON; - d_query.upload(query); - d_train.upload(train); - d_matcher.knnMatch(d_query, d_train, matches, 2); - GPU_FULL_OFF; -#endif - SUBTEST << size << "; radiusMatch"; - - float max_distance = 2.0f; - - matcher.radiusMatch(query, train, matches, max_distance); - - CPU_ON; - matcher.radiusMatch(query, train, matches, max_distance); - CPU_OFF; - -#ifdef USE_OPENCL - d_trainIdx.release(); - - WARMUP_ON; - d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); - WARMUP_OFF; - - GPU_ON; - d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); - GPU_OFF; - - GPU_FULL_ON; - d_query.upload(query); - d_train.upload(train); - d_matcher.radiusMatch(d_query, d_train, matches, max_distance); - GPU_FULL_OFF; -#endif - } -} -///////////// Lut //////////////////////// -TEST(lut) -{ - Mat src, lut, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_lut, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC3}; - std::string type_name[] = {"CV_8UC1", "CV_8UC3"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src, size, size, all_type[j], 0, 256); - gen(lut, 1, 256, CV_8UC1, 0, 1); - gen(dst, size, size, all_type[j], 0, 256); - - LUT(src, lut, dst); - - CPU_ON; - LUT(src, lut, dst); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - d_lut.upload(lut); - - WARMUP_ON; - ocl::LUT(d_src, d_lut, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::LUT(d_src, d_lut, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_lut.upload(lut); - ocl::LUT(d_src, d_lut, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Exp //////////////////////// -TEST(Exp) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; CV_32FC1"; - - gen(src, size, size, CV_32FC1, 0, 256); - gen(dst, size, size, CV_32FC1, 0, 256); - - exp(src, dst); - - CPU_ON; - exp(src, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::exp(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::exp(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::exp(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} - -///////////// LOG //////////////////////// -TEST(Log) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; 32F"; - - gen(src, size, size, CV_32F, 1, 10); - - log(src, dst); - - CPU_ON; - log(src, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::log(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::log(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::log(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} - -///////////// Add //////////////////////// - -TEST(Add) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 1); - gen(src2, size, size, all_type[j], 0, 1); - - add(src1, src2, dst); - - CPU_ON; - add(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::add(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::add(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::add(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Mul //////////////////////// -TEST(Mul) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - multiply(src1, src2, dst); - - CPU_ON; - multiply(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::multiply(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::multiply(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::multiply(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Div //////////////////////// -TEST(Div) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - divide(src1, src2, dst); - - CPU_ON; - divide(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::divide(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::divide(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::divide(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Absdiff //////////////////////// -TEST(Absdiff) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - absdiff(src1, src2, dst); - - CPU_ON; - absdiff(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::absdiff(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::absdiff(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::absdiff(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// CartToPolar //////////////////////// -TEST(CartToPolar) -{ - Mat src1, src2, dst, dst1; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst, d_dst1; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - gen(dst1, size, size, all_type[j], 0, 256); - - - cartToPolar(src1, src2, dst, dst1, 1); - - CPU_ON; - cartToPolar(src1, src2, dst, dst1, 1); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); - WARMUP_OFF; - - GPU_ON; - ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); - d_dst.download(dst); - d_dst1.download(dst1); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// PolarToCart //////////////////////// -TEST(PolarToCart) -{ - Mat src1, src2, dst, dst1; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst, d_dst1; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - gen(dst1, size, size, all_type[j], 0, 256); - - - polarToCart(src1, src2, dst, dst1, 1); - - CPU_ON; - polarToCart(src1, src2, dst, dst1, 1); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); - WARMUP_OFF; - - GPU_ON; - ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); - d_dst.download(dst); - d_dst1.download(dst1); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Magnitude //////////////////////// -TEST(magnitude) -{ - Mat x, y, mag; -#ifdef USE_OPENCL - ocl::oclMat d_x, d_y, d_mag; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(x, size, size, all_type[j], 0, 1); - gen(y, size, size, all_type[j], 0, 1); - - magnitude(x, y, mag); - - CPU_ON; - magnitude(x, y, mag); - CPU_OFF; -#ifdef USE_OPENCL - d_x.upload(x); - d_y.upload(y); - - WARMUP_ON; - ocl::magnitude(d_x, d_y, d_mag); - WARMUP_OFF; - - GPU_ON; - ocl::magnitude(d_x, d_y, d_mag); - GPU_OFF; - - GPU_FULL_ON; - d_x.upload(x); - d_y.upload(y); - ocl::magnitude(d_x, d_y, d_mag); - d_mag.download(mag); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Transpose //////////////////////// -TEST(Transpose) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - transpose(src, dst); - - CPU_ON; - transpose(src, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::transpose(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::transpose(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::transpose(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Flip //////////////////////// -TEST(Flip) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; FLIP_BOTH"; - - gen(src, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - flip(src, dst, 0); - - CPU_ON; - flip(src, dst, 0); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::flip(d_src, d_dst, 0); - WARMUP_OFF; - - GPU_ON; - ocl::flip(d_src, d_dst, 0); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::flip(d_src, d_dst, 0); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// minMax //////////////////////// -TEST(minMax) -{ - Mat src; -#ifdef USE_OPENCL - ocl::oclMat d_src; -#endif - double min_val, max_val; - Point min_loc, max_loc; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src, size, size, all_type[j], 0, 256); - - CPU_ON; - minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::minMax(d_src, &min_val, &max_val); - WARMUP_OFF; - - GPU_ON; - ocl::minMax(d_src, &min_val, &max_val); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::minMax(d_src, &min_val, &max_val); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// minMaxLoc //////////////////////// -TEST(minMaxLoc) -{ - Mat src; -#ifdef USE_OPENCL - ocl::oclMat d_src; -#endif - double min_val, max_val; - Point min_loc, max_loc; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 1); - - CPU_ON; - minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); - WARMUP_OFF; - - GPU_ON; - ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Sum //////////////////////// -TEST(Sum) -{ - Mat src; - Scalar cpures, gpures; -#ifdef USE_OPENCL - ocl::oclMat d_src; -#endif - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - cpures = sum(src); - - CPU_ON; - cpures = sum(src); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - gpures = ocl::sum(d_src); - WARMUP_OFF; - - GPU_ON; - gpures = ocl::sum(d_src); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - gpures = ocl::sum(d_src); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// countNonZero //////////////////////// -TEST(countNonZero) -{ - Mat src; -#ifdef USE_OPENCL - ocl::oclMat d_src; -#endif - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - countNonZero(src); - - CPU_ON; - countNonZero(src); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::countNonZero(d_src); - WARMUP_OFF; - - GPU_ON; - ocl::countNonZero(d_src); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::countNonZero(d_src); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Phase //////////////////////// -TEST(Phase) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - phase(src1, src2, dst, 1); - - CPU_ON; - phase(src1, src2, dst, 1); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::phase(d_src1, d_src2, d_dst, 1); - WARMUP_OFF; - - GPU_ON; - ocl::phase(d_src1, d_src2, d_dst, 1); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::phase(d_src1, d_src2, d_dst, 1); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// bitwise_and//////////////////////// -TEST(bitwise_and) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_and(src1, src2, dst); - - CPU_ON; - bitwise_and(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::bitwise_and(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_and(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::bitwise_and(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// bitwise_or//////////////////////// -TEST(bitwise_or) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_or(src1, src2, dst); - - CPU_ON; - bitwise_or(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::bitwise_or(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_or(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::bitwise_or(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// bitwise_xor//////////////////////// -TEST(bitwise_xor) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_xor(src1, src2, dst); - - CPU_ON; - bitwise_xor(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::bitwise_xor(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_xor(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::bitwise_xor(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// bitwise_not//////////////////////// -TEST(bitwise_not) -{ - Mat src1, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_not(src1, dst); - - CPU_ON; - bitwise_not(src1, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - - WARMUP_ON; - ocl::bitwise_not(d_src1, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_not(d_src1, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - ocl::bitwise_not(d_src1, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// compare//////////////////////// -TEST(compare) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int CMP_EQ = 0; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - compare(src1, src2, dst, CMP_EQ); - - CPU_ON; - compare(src1, src2, dst, CMP_EQ); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); - WARMUP_OFF; - - GPU_ON; - ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// pow //////////////////////// -TEST(pow) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 100); - gen(dst, size, size, all_type[j], 0, 100); - - pow(src, -2.0, dst); - - CPU_ON; - pow(src, -2.0, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - d_dst.upload(dst); - - WARMUP_ON; - ocl::pow(d_src, -2.0, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::pow(d_src, -2.0, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::pow(d_src, -2.0, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// MagnitudeSqr//////////////////////// -TEST(MagnitudeSqr) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++) - { - SUBTEST << size << 'x' << size << "; " << type_name[t]; - - gen(src1, size, size, all_type[t], 0, 256); - gen(src2, size, size, all_type[t], 0, 256); - gen(dst, size, size, all_type[t], 0, 256); - - - for (int i = 0; i < src1.rows; ++i) - - for (int j = 0; j < src1.cols; ++j) - { - float val1 = src1.at<float>(i, j); - float val2 = src2.at<float>(i, j); - - ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; - - } - - CPU_ON; - - for (int i = 0; i < src1.rows; ++i) - for (int j = 0; j < src1.cols; ++j) - { - float val1 = src1.at<float>(i, j); - float val2 = src2.at<float>(i, j); - - ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; - - } - - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// AddWeighted//////////////////////// -TEST(AddWeighted) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - double alpha = 2.0, beta = 1.0, gama = 3.0; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - addWeighted(src1, alpha, src2, beta, gama, dst); - - CPU_ON; - addWeighted(src1, alpha, src2, beta, gama, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Blur//////////////////////// -TEST(Blur) -{ - Mat src1, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_dst; -#endif - Size ksize = Size(3, 3); - int bordertype = BORDER_CONSTANT; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - blur(src1, dst, ksize, Point(-1, -1), bordertype); - - CPU_ON; - blur(src1, dst, ksize, Point(-1, -1), bordertype); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - - WARMUP_ON; - ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); - WARMUP_OFF; - - GPU_ON; - ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Laplacian//////////////////////// -TEST(Laplacian) -{ - Mat src1, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_dst; -#endif - int ksize = 3; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - Laplacian(src1, dst, -1, ksize, 1); - - CPU_ON; - Laplacian(src1, dst, -1, ksize, 1); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - - WARMUP_ON; - ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); - WARMUP_OFF; - - GPU_ON; - ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Erode //////////////////// -TEST(Erode) -{ - Mat src, dst, ker; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(256)); - ker = getStructuringElement(MORPH_RECT, Size(3, 3)); - - erode(src, dst, ker); - - CPU_ON; - erode(src, dst, ker); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::erode(d_src, d_dst, ker); - WARMUP_OFF; - - GPU_ON; - ocl::erode(d_src, d_dst, ker); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::erode(d_src, d_dst, ker); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Sobel //////////////////////// -TEST(Sobel) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int dx = 1; - int dy = 1; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - Sobel(src, dst, -1, dx, dy); - - CPU_ON; - Sobel(src, dst, -1, dx, dy); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::Sobel(d_src, d_dst, -1, dx, dy); - WARMUP_OFF; - - GPU_ON; - ocl::Sobel(d_src, d_dst, -1, dx, dy); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::Sobel(d_src, d_dst, -1, dx, dy); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Scharr //////////////////////// -TEST(Scharr) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int dx = 1; - int dy = 0; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - Scharr(src, dst, -1, dx, dy); - - CPU_ON; - Scharr(src, dst, -1, dx, dy); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::Scharr(d_src, d_dst, -1, dx, dy); - WARMUP_OFF; - - GPU_ON; - ocl::Scharr(d_src, d_dst, -1, dx, dy); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::Scharr(d_src, d_dst, -1, dx, dy); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// GaussianBlur //////////////////////// -TEST(GaussianBlur) -{ - Mat src, dst; - int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - GaussianBlur(src, dst, Size(9, 9), 0); - - CPU_ON; - GaussianBlur(src, dst, Size(9, 9), 0); - CPU_OFF; -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_dst(src.size(), src.type()); - ocl::oclMat d_buf; - - WARMUP_ON; - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); - WARMUP_OFF; - - GPU_ON; - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// equalizeHist //////////////////////// -TEST(equalizeHist) -{ - Mat src, dst; - int all_type[] = {CV_8UC1}; - std::string type_name[] = {"CV_8UC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - equalizeHist(src, dst); - - CPU_ON; - equalizeHist(src, dst); - CPU_OFF; -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_dst; - ocl::oclMat d_hist; - ocl::oclMat d_buf; - - WARMUP_ON; - ocl::equalizeHist(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::equalizeHist(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::equalizeHist(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -/////////// CopyMakeBorder ////////////////////// -TEST(CopyMakeBorder) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_dst; -#endif - int bordertype = BORDER_CONSTANT; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - - gen(src, size, size, all_type[j], 0, 256); - - copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - - CPU_ON; - copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - CPU_OFF; -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - - WARMUP_ON; - ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - WARMUP_OFF; - - GPU_ON; - ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// cornerMinEigenVal //////////////////////// -TEST(cornerMinEigenVal) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_dst; -#endif - int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4); - int borderType = BORDER_REFLECT; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - - gen(src, size, size, all_type[j], 0, 256); - - cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); - - CPU_ON; - cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); - CPU_OFF; -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - - WARMUP_ON; - ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); - WARMUP_OFF; - - GPU_ON; - ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// cornerHarris //////////////////////// -TEST(cornerHarris) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; BORDER_REFLECT"; - - gen(src, size, size, all_type[j], 0, 1); - - cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT); - - CPU_ON; - cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); - WARMUP_OFF; - - GPU_ON; - ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - - } -} -///////////// integral //////////////////////// -TEST(integral) -{ - Mat src, sum; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_sum, d_buf; -#endif - int all_type[] = {CV_8UC1}; - std::string type_name[] = {"CV_8UC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - integral(src, sum); - - CPU_ON; - integral(src, sum); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::integral(d_src, d_sum); - WARMUP_OFF; - - GPU_ON; - ocl::integral(d_src, d_sum); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::integral(d_src, d_sum); - d_sum.download(sum); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// WarpAffine //////////////////////// -TEST(WarpAffine) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - static const double coeffs[2][3] = - { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0} - }; - Mat M(2, 3, CV_64F, (void *)coeffs); - int interpolation = INTER_NEAREST; - - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - Size size1 = Size(size, size); - - warpAffine(src, dst, M, size1, interpolation); - - CPU_ON; - warpAffine(src, dst, M, size1, interpolation); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::warpAffine(d_src, d_dst, M, size1, interpolation); - WARMUP_OFF; - - GPU_ON; - ocl::warpAffine(d_src, d_dst, M, size1, interpolation); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::warpAffine(d_src, d_dst, M, size1, interpolation); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// WarpPerspective //////////////////////// -TEST(WarpPerspective) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - static const double coeffs[3][3] = - { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0}, - {0.0, 0.0, 1.0} - }; - Mat M(3, 3, CV_64F, (void *)coeffs); - int interpolation = INTER_NEAREST; - - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - Size size1 = Size(size, size); - - warpPerspective(src, dst, M, size1, interpolation); - - CPU_ON; - warpPerspective(src, dst, M, size1, interpolation); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); - WARMUP_OFF; - - GPU_ON; - ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// resize //////////////////////// -TEST(resize) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; up"; - - gen(src, size, size, all_type[j], 0, 256); - - resize(src, dst, Size(), 2.0, 2.0); - - CPU_ON; - resize(src, dst, Size(), 2.0, 2.0); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); - WARMUP_OFF; - - GPU_ON; - ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; down"; - - gen(src, size, size, all_type[j], 0, 256); - - resize(src, dst, Size(), 0.5, 0.5); - - CPU_ON; - resize(src, dst, Size(), 0.5, 0.5); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); - WARMUP_OFF; - - GPU_ON; - ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// threshold//////////////////////// -TEST(threshold) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY"; - - gen(src, size, size, CV_8U, 0, 100); - - threshold(src, dst, 50.0, 0.0, THRESH_BINARY); - - CPU_ON; - threshold(src, dst, 50.0, 0.0, THRESH_BINARY); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - WARMUP_OFF; - - GPU_ON; - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; 32FC1; THRESH_TRUNC [NPP]"; - - gen(src, size, size, CV_32FC1, 0, 100); - - threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); - - CPU_ON; - threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - WARMUP_OFF; - - GPU_ON; - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} -///////////// meanShiftFiltering//////////////////////// -TEST(meanShiftFiltering) -{ - int sp = 10, sr = 10; - - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4"; - - gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256)); - - pyrMeanShiftFiltering(src, dst, sp, sr); - - CPU_ON; - pyrMeanShiftFiltering(src, dst, sp, sr); - CPU_OFF; -#ifdef USE_OPENCL - gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - - d_src.upload(src); - - WARMUP_ON; - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - WARMUP_OFF; - - GPU_ON; - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} -///////////// meanShiftProc//////////////////////// -COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab) -{ - - int isr2 = sr * sr; - int c0, c1, c2, c3; - int iter; - uchar *ptr = NULL; - uchar *pstart = NULL; - int revx = 0, revy = 0; - c0 = sptr[0]; - c1 = sptr[1]; - c2 = sptr[2]; - c3 = sptr[3]; - - // iterate meanshift procedure - for (iter = 0; iter < maxIter; iter++) - { - int count = 0; - int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; - - //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) - int minx = x0 - sp; - int miny = y0 - sp; - int maxx = x0 + sp; - int maxy = y0 + sp; - - //deal with the image boundary - if (minx < 0) - { - minx = 0; - } - - if (miny < 0) - { - miny = 0; - } - - if (maxx >= size.width) - { - maxx = size.width - 1; - } - - if (maxy >= size.height) - { - maxy = size.height - 1; - } - - if (iter == 0) - { - pstart = sptr; - } - else - { - pstart = pstart + revy * sstep + (revx << 2); //point to the new position - } - - ptr = pstart; - ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row - - for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) - { - int rowCount = 0; - int x = minx; -#if CV_ENABLE_UNROLLED - - for (; x + 4 <= maxx; x += 4, ptr += 16) - { - int t0, t1, t2; - t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x; - rowCount++; - } - - t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 1; - rowCount++; - } - - t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 2; - rowCount++; - } - - t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 3; - rowCount++; - } - } - -#endif - - for (; x <= maxx; x++, ptr += 4) - { - int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x; - rowCount++; - } - } - - if (rowCount == 0) - { - continue; - } - - count += rowCount; - sy += y * rowCount; - } - - if (count == 0) - { - break; - } - - int x1 = sx / count; - int y1 = sy / count; - s0 = s0 / count; - s1 = s1 / count; - s2 = s2 / count; - - bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + - tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); - - //revise the pointer corresponding to the new (y0,x0) - revx = x1 - x0; - revy = y1 - y0; - - x0 = x1; - y0 = y1; - c0 = s0; - c1 = s1; - c2 = s2; - - if (stopFlag) - { - break; - } - } //for iter - - dptr[0] = (uchar)c0; - dptr[1] = (uchar)c1; - dptr[2] = (uchar)c2; - dptr[3] = (uchar)c3; - - COOR coor; - coor.x = static_cast<short>(x0); - coor.y = static_cast<short>(y0); - return coor; -} - -void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit) -{ - - if (src_roi.empty()) - { - CV_Error(CV_StsBadArg, "The input image is empty"); - } - - if (src_roi.depth() != CV_8U || src_roi.channels() != 4) - { - CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported"); - } - - CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) && - (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows)); - CV_Assert(!(dstCoor_roi.step & 0x3)); - - if (!(crit.type & cv::TermCriteria::MAX_ITER)) - { - crit.maxCount = 5; - } - - int maxIter = std::min(std::max(crit.maxCount, 1), 100); - float eps; - - if (!(crit.type & cv::TermCriteria::EPS)) - { - eps = 1.f; - } - - eps = (float)std::max(crit.epsilon, 0.0); - - int tab[512]; - - for (int i = 0; i < 512; i++) - { - tab[i] = (i - 255) * (i - 255); - } - - uchar *sptr = src_roi.data; - uchar *dptr = dst_roi.data; - short *dCoorptr = (short *)dstCoor_roi.data; - int sstep = (int)src_roi.step; - int dstep = (int)dst_roi.step; - int dCoorstep = (int)dstCoor_roi.step >> 1; - cv::Size size = src_roi.size(); - - for (int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), - dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1)) - { - for (int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2) - { - *((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); - } - } - -} -TEST(meanShiftProc) -{ - Mat src, dst, dstCoor_roi; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst, d_dstCoor_roi; -#endif - TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1); - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 "; - - gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256)); - - meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); - - CPU_ON; - meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); - WARMUP_OFF; - - GPU_ON; - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); - d_dst.download(dst); - d_dstCoor_roi.download(dstCoor_roi); - GPU_FULL_OFF; -#endif - } -} -///////////// ConvertTo//////////////////////// -TEST(ConvertTo) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " to 32FC1"; - - gen(src, size, size, all_type[j], 0, 256); - //gen(dst, size, size, all_type[j], 0, 256); - - //d_dst.upload(dst); - - src.convertTo(dst, CV_32FC1); - - CPU_ON; - src.convertTo(dst, CV_32FC1); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - d_src.convertTo(d_dst, CV_32FC1); - WARMUP_OFF; - - GPU_ON; - d_src.convertTo(d_dst, CV_32FC1); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_src.convertTo(d_dst, CV_32FC1); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// copyTo//////////////////////// -TEST(copyTo) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - //gen(dst, size, size, all_type[j], 0, 256); - - //d_dst.upload(dst); - - src.copyTo(dst); - - CPU_ON; - src.copyTo(dst); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - d_src.copyTo(d_dst); - WARMUP_OFF; - - GPU_ON; - d_src.copyTo(d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_src.copyTo(d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// setTo//////////////////////// -TEST(setTo) -{ - Mat src, dst; - Scalar val(1, 2, 3, 4); -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - src.setTo(val); - - CPU_ON; - src.setTo(val); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - d_src.setTo(val); - WARMUP_OFF; - - GPU_ON; - d_src.setTo(val); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_src.setTo(val); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Merge//////////////////////// -TEST(Merge) -{ - Mat dst; -#ifdef USE_OPENCL - ocl::oclMat d_dst; -#endif - int channels = 4; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - Size size1 = Size(size, size); - std::vector<Mat> src(channels); - - for (int i = 0; i < channels; ++i) - { - src[i] = Mat(size1, all_type[j], cv::Scalar::all(i)); - } - - merge(src, dst); - - CPU_ON; - merge(src, dst); - CPU_OFF; - -#ifdef USE_OPENCL - std::vector<ocl::oclMat> d_src(channels); - - for (int i = 0; i < channels; ++i) - { - d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i)); - } - - WARMUP_ON; - ocl::merge(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::merge(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - - for (int i = 0; i < channels; ++i) - { - d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i)); - } - - ocl::merge(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Split//////////////////////// -TEST(Split) -{ - //int channels = 4; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - Size size1 = Size(size, size); - - Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); - - std::vector<cv::Mat> dst; - - split(src, dst); - - CPU_ON; - split(src, dst); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); - std::vector<cv::ocl::oclMat> d_dst; - - WARMUP_ON; - ocl::split(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::split(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::split(d_src, d_dst); - GPU_FULL_OFF; -#endif - } - - } -} - - -///////////// norm//////////////////////// -TEST(norm) -{ - Mat src, buf; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_buf; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF"; - - gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); - gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); - - norm(src, NORM_INF); - - CPU_ON; - norm(src, NORM_INF); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - d_buf.upload(buf); - - WARMUP_ON; - ocl::norm(d_src, d_buf, NORM_INF); - WARMUP_OFF; - - GPU_ON; - ocl::norm(d_src, d_buf, NORM_INF); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::norm(d_src, d_buf, NORM_INF); - GPU_FULL_OFF; -#endif - } -} -///////////// remap//////////////////////// -TEST(remap) -{ - Mat src, dst, xmap, ymap; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst, d_xmap, d_ymap; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - int interpolation = INTER_LINEAR; - int borderMode = BORDER_CONSTANT; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++) - { - SUBTEST << size << 'x' << size << "; src " << type_name[t] << "; map CV_32FC1"; - - gen(src, size, size, all_type[t], 0, 256); - - xmap.create(size, size, CV_32FC1); - dst.create(size, size, CV_32FC1); - ymap.create(size, size, CV_32FC1); - - for (int i = 0; i < size; ++i) - { - float *xmap_row = xmap.ptr<float>(i); - float *ymap_row = ymap.ptr<float>(i); - - for (int j = 0; j < size; ++j) - { - xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f; - ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f; - } - } - - - remap(src, dst, xmap, ymap, interpolation, borderMode); - - CPU_ON; - remap(src, dst, xmap, ymap, interpolation, borderMode); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - d_dst.upload(dst); - d_xmap.upload(xmap); - d_ymap.upload(ymap); - - WARMUP_ON; - ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - WARMUP_OFF; - - GPU_ON; - ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// cvtColor//////////////////////// -TEST(cvtColor) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC4}; - std::string type_name[] = {"CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - gen(src, size, size, all_type[j], 0, 256); - SUBTEST << size << "x" << size << "; " << type_name[j] << " ; CV_RGBA2GRAY"; - - cvtColor(src, dst, CV_RGBA2GRAY, 4); - - CPU_ON; - cvtColor(src, dst, CV_RGBA2GRAY, 4); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); - WARMUP_OFF; - - GPU_ON; - ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - - } - - -} -///////////// filter2D//////////////////////// -TEST(filter2D) -{ - Mat src; - - for (int size = 1000; size <= 4000; size *= 2) - { - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - gen(src, size, size, all_type[j], 0, 256); - - for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1) - { - SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ; - - Mat kernel; - gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); - - Mat dst; - cv::filter2D(src, dst, -1, kernel); - - CPU_ON; - cv::filter2D(src, dst, -1, kernel); - CPU_OFF; -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_dst; - - WARMUP_ON; - ocl::filter2D(d_src, d_dst, -1, kernel); - WARMUP_OFF; - - GPU_ON; - ocl::filter2D(d_src, d_dst, -1, kernel); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::filter2D(d_src, d_dst, -1, kernel); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } - - - } -} - - -///////////// dft //////////////////////// -TEST(dft) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - int all_type[] = {CV_32FC1, CV_32FC2}; - std::string type_name[] = {"CV_32FC1", "CV_32FC2"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; complex-to-complex"; - - gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(1)); - - dft(src, dst); - - CPU_ON; - dft(src, dst); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::dft(d_src, d_dst, Size(size, size)); - WARMUP_OFF; - - GPU_ON; - ocl::dft(d_src, d_dst, Size(size, size)); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::dft(d_src, d_dst, Size(size, size)); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// gemm //////////////////////// -TEST(gemm) -{ - Mat src1, src2, src3, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_src3, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size; - - gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); - gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); - gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); - - gemm(src1, src2, 1.0, src3, 1.0, dst); - - CPU_ON; - gemm(src1, src2, 1.0, src3, 1.0, dst); - CPU_OFF; - -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - d_src3.upload(src3); - - WARMUP_ON; - ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - d_src3.upload(src3); - ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} - -int main(int argc, const char *argv[]) -{ -#ifdef USE_OPENCL - vector<ocl::Info> oclinfo; - int num_devices = getDevice(oclinfo); - - if (num_devices < 1) - { - cerr << "no device found\n"; - return -1; - } - - int devidx = 0; - - for (size_t i = 0; i < oclinfo.size(); i++) - { - for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++) - { - printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str()); - } - } - -#endif - redirectError(cvErrorCallback); - - const char *keys = - "{ h | help | false | print help message }" - "{ f | filter | | filter for test }" - "{ w | workdir | | set working directory }" - "{ l | list | false | show all tests }" - "{ d | device | 0 | device id }" - "{ i | iters | 10 | iteration count }" - "{ m | warmup | 1 | gpu warm up iteration count}" - "{ t | xtop | 1.1 | xfactor top boundary}" - "{ b | xbottom | 0.9 | xfactor bottom boundary}" - "{ v | verify | false | only run gpu once to verify if problems occur}"; - - CommandLineParser cmd(argc, argv, keys); - - if (cmd.get<bool>("help")) - { - cout << "Avaible options:" << endl; - cmd.printParams(); - return 0; - } - -#ifdef USE_OPENCL - int device = cmd.get<int>("device"); - - if (device < 0 || device >= num_devices) - { - cerr << "Invalid device ID" << endl; - return -1; - } - - if (cmd.get<bool>("verify")) - { - TestSystem::instance().setNumIters(1); - TestSystem::instance().setGPUWarmupIters(0); - TestSystem::instance().setCPUIters(0); - } - - devidx = 0; - - for (size_t i = 0; i < oclinfo.size(); i++) - { - for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++) - { - if (device == devidx) - { - ocl::setDevice(oclinfo[i], (int)j); - TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]); - printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str()); - goto END_DEV; - } - } - } - -END_DEV: - -#endif - string filter = cmd.get<string>("filter"); - string workdir = cmd.get<string>("workdir"); - bool list = cmd.get<bool>("list"); - int iters = cmd.get<int>("iters"); - int wu_iters = cmd.get<int>("warmup"); - double x_top = cmd.get<double>("xtop"); - double x_bottom = cmd.get<double>("xbottom"); - - TestSystem::instance().setTopThreshold(x_top); - TestSystem::instance().setBottomThreshold(x_bottom); - - if (!filter.empty()) - { - TestSystem::instance().setTestFilter(filter); - } - - if (!workdir.empty()) - { - if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\') - { - workdir += '/'; - } - - TestSystem::instance().setWorkingDir(workdir); - } - - if (list) - { - TestSystem::instance().setListMode(true); - } - - TestSystem::instance().setNumIters(iters); - TestSystem::instance().setGPUWarmupIters(wu_iters); - - TestSystem::instance().run(); - - return 0; -}