#include #include #include #include #include #include #include #include #include "opencv2/imgproc.hpp" #include "opencv2/highgui.hpp" #include "opencv2/calib3d.hpp" #include "opencv2/video.hpp" #include "opencv2/nonfree.hpp" #include "opencv2/objdetect.hpp" #include "opencv2/features2d.hpp" #define USE_OPENCL #ifdef USE_OPENCL #include "opencv2/ocl.hpp" #include "opencv2/nonfree/ocl.hpp" #endif #define TAB " " using namespace std; using namespace cv; // This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files // All images needed in this test are in samples/gpu folder. // For haar template, haarcascade_frontalface_alt.xml shouold be in working directory void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high); string abspath(const string &relpath); int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *); typedef struct { short x; short y; } COOR; COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab); void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit); class Runnable { public: explicit Runnable(const std::string &runname): name_(runname) {} virtual ~Runnable() {} const std::string &name() const { return name_; } virtual void run() = 0; private: std::string name_; }; class TestSystem { public: static TestSystem &instance() { static TestSystem me; return me; } void setWorkingDir(const std::string &val) { working_dir_ = val; } const std::string &workingDir() const { return working_dir_; } void setTestFilter(const std::string &val) { test_filter_ = val; } const std::string &testFilter() const { return test_filter_; } void setNumIters(int num_iters) { num_iters_ = num_iters; } void setGPUWarmupIters(int num_iters) { gpu_warmup_iters_ = num_iters; } void setCPUIters(int num_iters) { cpu_num_iters_ = num_iters; } void setTopThreshold(double top) { top_ = top; } void setBottomThreshold(double bottom) { bottom_ = bottom; } void addInit(Runnable *init) { inits_.push_back(init); } void addTest(Runnable *test) { tests_.push_back(test); } void run(); // It's public because OpenCV callback uses it void printError(const std::string &msg); std::stringstream &startNewSubtest() { finishCurrentSubtest(); return cur_subtest_description_; } bool stop() const { return cur_iter_idx_ >= num_iters_; } bool cpu_stop() const { return cur_iter_idx_ >= cpu_num_iters_; } bool warmupStop() { return cur_warmup_idx_++ >= gpu_warmup_iters_; } void warmupComplete() { cur_warmup_idx_ = 0; } void cpuOn() { cpu_started_ = cv::getTickCount(); } void cpuOff() { int64 delta = cv::getTickCount() - cpu_started_; cpu_times_.push_back(delta); ++cur_iter_idx_; } void cpuComplete() { cpu_elapsed_ += meanTime(cpu_times_); cur_subtest_is_empty_ = false; cur_iter_idx_ = 0; } void gpuOn() { gpu_started_ = cv::getTickCount(); } void gpuOff() { int64 delta = cv::getTickCount() - gpu_started_; gpu_times_.push_back(delta); ++cur_iter_idx_; } void gpuComplete() { gpu_elapsed_ += meanTime(gpu_times_); cur_subtest_is_empty_ = false; cur_iter_idx_ = 0; } void gpufullOn() { gpu_full_started_ = cv::getTickCount(); } void gpufullOff() { int64 delta = cv::getTickCount() - gpu_full_started_; gpu_full_times_.push_back(delta); ++cur_iter_idx_; } void gpufullComplete() { gpu_full_elapsed_ += meanTime(gpu_full_times_); cur_subtest_is_empty_ = false; cur_iter_idx_ = 0; } bool isListMode() const { return is_list_mode_; } void setListMode(bool value) { is_list_mode_ = value; } void setRecordName(const std::string &name) { recordname_ = name; } void setCurrentTest(const std::string &name) { itname_ = name; itname_changed_ = true; } private: TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0), gpu_elapsed_(0), gpu_full_elapsed_(0), speedup_total_(0.0), num_subtests_called_(0), speedup_faster_count_(0), speedup_slower_count_(0), speedup_equal_count_(0), speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false), num_iters_(10), cpu_num_iters_(2), gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0), record_(0), recordname_("performance"), itname_changed_(true) { cpu_times_.reserve(num_iters_); gpu_times_.reserve(num_iters_); gpu_full_times_.reserve(num_iters_); } void finishCurrentSubtest(); void resetCurrentSubtest() { cpu_elapsed_ = 0; gpu_elapsed_ = 0; gpu_full_elapsed_ = 0; cur_subtest_description_.str(""); cur_subtest_is_empty_ = true; cur_iter_idx_ = 0; cpu_times_.clear(); gpu_times_.clear(); gpu_full_times_.clear(); } double meanTime(const std::vector &samples); void printHeading(); void printSummary(); void printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup); void writeHeading(); void writeSummary(); void writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev); std::string working_dir_; std::string test_filter_; std::vector inits_; std::vector tests_; std::stringstream cur_subtest_description_; bool cur_subtest_is_empty_; int64 cpu_started_; int64 gpu_started_; int64 gpu_full_started_; double cpu_elapsed_; double gpu_elapsed_; double gpu_full_elapsed_; double speedup_total_; double speedup_full_total_; int num_subtests_called_; int speedup_faster_count_; int speedup_slower_count_; int speedup_equal_count_; int speedup_full_faster_count_; int speedup_full_slower_count_; int speedup_full_equal_count_; bool is_list_mode_; double top_; double bottom_; int num_iters_; int cpu_num_iters_; //there's no need to set cpu running same times with gpu int gpu_warmup_iters_; //gpu warm up times, default is 1 int cur_iter_idx_; int cur_warmup_idx_; //current gpu warm up times std::vector cpu_times_; std::vector gpu_times_; std::vector gpu_full_times_; FILE *record_; std::string recordname_; std::string itname_; bool itname_changed_; }; #define GLOBAL_INIT(name) \ struct name##_init: Runnable { \ name##_init(): Runnable(#name) { \ TestSystem::instance().addInit(this); \ } \ void run(); \ } name##_init_instance; \ void name##_init::run() #define TEST(name) \ struct name##_test: Runnable { \ name##_test(): Runnable(#name) { \ TestSystem::instance().addTest(this); \ } \ void run(); \ } name##_test_instance; \ void name##_test::run() #define SUBTEST TestSystem::instance().startNewSubtest() #define CPU_ON \ while (!TestSystem::instance().cpu_stop()) { \ TestSystem::instance().cpuOn() #define CPU_OFF \ TestSystem::instance().cpuOff(); \ } TestSystem::instance().cpuComplete() #define GPU_ON \ while (!TestSystem::instance().stop()) { \ TestSystem::instance().gpuOn() #define GPU_OFF \ TestSystem::instance().gpuOff(); \ } TestSystem::instance().gpuComplete() #define GPU_FULL_ON \ while (!TestSystem::instance().stop()) { \ TestSystem::instance().gpufullOn() #define GPU_FULL_OFF \ TestSystem::instance().gpufullOff(); \ } TestSystem::instance().gpufullComplete() #define WARMUP_ON \ while (!TestSystem::instance().warmupStop()) { #define WARMUP_OFF \ } TestSystem::instance().warmupComplete() void TestSystem::run() { if (is_list_mode_) { for (vector::iterator it = tests_.begin(); it != tests_.end(); ++it) { cout << (*it)->name() << endl; } return; } // Run test initializers for (vector::iterator it = inits_.begin(); it != inits_.end(); ++it) { if ((*it)->name().find(test_filter_, 0) != string::npos) { (*it)->run(); } } printHeading(); writeHeading(); // Run tests for (vector::iterator it = tests_.begin(); it != tests_.end(); ++it) { try { if ((*it)->name().find(test_filter_, 0) != string::npos) { cout << endl << (*it)->name() << ":\n"; setCurrentTest((*it)->name()); //fprintf(record_,"%s\n",(*it)->name().c_str()); (*it)->run(); finishCurrentSubtest(); } } catch (const Exception &) { // Message is printed via callback resetCurrentSubtest(); } catch (const runtime_error &e) { printError(e.what()); resetCurrentSubtest(); } } #ifdef USE_OPENCL printSummary(); writeSummary(); #endif } void TestSystem::finishCurrentSubtest() { if (cur_subtest_is_empty_) // There is no need to print subtest statistics { return; } double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0; double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0; double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0; double speedup = static_cast(cpu_elapsed_) / std::max(1.0, gpu_elapsed_); speedup_total_ += speedup; double fullspeedup = static_cast(cpu_elapsed_) / std::max(1.0, gpu_full_elapsed_); speedup_full_total_ += fullspeedup; if (speedup > top_) { speedup_faster_count_++; } else if (speedup < bottom_) { speedup_slower_count_++; } else { speedup_equal_count_++; } if (fullspeedup > top_) { speedup_full_faster_count_++; } else if (fullspeedup < bottom_) { speedup_full_slower_count_++; } else { speedup_full_equal_count_++; } // compute min, max and std::sort(gpu_times_.begin(), gpu_times_.end()); double gpu_min = gpu_times_.front() / getTickFrequency() * 1000.0; double gpu_max = gpu_times_.back() / getTickFrequency() * 1000.0; double deviation = 0; if (gpu_times_.size() > 1) { double sum = 0; for (size_t i = 0; i < gpu_times_.size(); i++) { int64 diff = gpu_times_[i] - static_cast(gpu_elapsed_); double diff_time = diff * 1000 / getTickFrequency(); sum += diff_time * diff_time; } deviation = std::sqrt(sum / gpu_times_.size()); } printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation); num_subtests_called_++; resetCurrentSubtest(); } double TestSystem::meanTime(const vector &samples) { double sum = accumulate(samples.begin(), samples.end(), 0.); return sum / samples.size(); } void TestSystem::printHeading() { cout << endl; cout << setiosflags(ios_base::left); #ifdef USE_OPENCL cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP" << "DESCRIPTION\n"; #else cout << TAB << setw(10) << "CPU, ms\n"; #endif cout << resetiosflags(ios_base::left); } void TestSystem::writeHeading() { if (!record_) { #ifdef USE_OPENCL recordname_ += "_OCL.csv"; #else recordname_ += "_CPU.csv"; #endif record_ = fopen(recordname_.c_str(), "w"); } #ifdef USE_OPENCL fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n"); #else fprintf(record_, "NAME,DESCRIPTION,CPU (ms)\n"); #endif fflush(record_); } void TestSystem::printSummary() { cout << setiosflags(ios_base::fixed); cout << "\naverage GPU speedup: x" << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_) << endl; cout << "\nGPU exceeded: " << setprecision(3) << speedup_faster_count_ << "\nGPU passed: " << setprecision(3) << speedup_equal_count_ << "\nGPU failed: " << setprecision(3) << speedup_slower_count_ << endl; cout << "\nGPU exceeded rate: " << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100 << "%" << "\nGPU passed rate: " << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100 << "%" << "\nGPU failed rate: " << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100 << "%" << endl; cout << "\naverage GPUTOTAL speedup: x" << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_) << endl; cout << "\nGPUTOTAL exceeded: " << setprecision(3) << speedup_full_faster_count_ << "\nGPUTOTAL passed: " << setprecision(3) << speedup_full_equal_count_ << "\nGPUTOTAL failed: " << setprecision(3) << speedup_full_slower_count_ << endl; cout << "\nGPUTOTAL exceeded rate: " << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100 << "%" << "\nGPUTOTAL passed rate: " << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100 << "%" << "\nGPUTOTAL failed rate: " << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100 << "%" << endl; cout << resetiosflags(ios_base::fixed); } void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup) { cout << TAB << setiosflags(ios_base::left); stringstream stream; stream << cpu_time; cout << setw(10) << stream.str(); #ifdef USE_OPENCL stream.str(""); stream << gpu_time; cout << setw(10) << stream.str(); stream.str(""); stream << "x" << setprecision(3) << speedup; cout << setw(14) << stream.str(); stream.str(""); stream << gpu_full_time; cout << setw(14) << stream.str(); stream.str(""); stream << "x" << setprecision(3) << fullspeedup; cout << setw(14) << stream.str(); #endif cout << cur_subtest_description_.str(); cout << resetiosflags(ios_base::left) << endl; } void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev) { if (!record_) { recordname_ += ".csv"; record_ = fopen(recordname_.c_str(), "w"); } #ifdef USE_OPENCL fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "", cur_subtest_description_.str().c_str(), cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup, gpu_min, gpu_max, std_dev); #else fprintf(record_, "%s,%s,%.3f\n", itname_changed_ ? itname_.c_str() : "", cur_subtest_description_.str().c_str(), cpu_time); #endif if (itname_changed_) { itname_changed_ = false; } fflush(record_); } void TestSystem::writeSummary() { if (!record_) { recordname_ += ".csv"; record_ = fopen(recordname_.c_str(), "w"); } fprintf(record_, "\nAverage GPU speedup: %.3f\n" "exceeded: %d (%.3f%%)\n" "passed: %d (%.3f%%)\n" "failed: %d (%.3f%%)\n" "\nAverage GPUTOTAL speedup: %.3f\n" "exceeded: %d (%.3f%%)\n" "passed: %d (%.3f%%)\n" "failed: %d (%.3f%%)\n", speedup_total_ / std::max(1, num_subtests_called_), speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100, speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100, speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100, speedup_full_total_ / std::max(1, num_subtests_called_), speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100, speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100, speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100 ); fflush(record_); } void TestSystem::printError(const std::string &msg) { cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl; } void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high) { mat.create(rows, cols, type); RNG rng(0); rng.fill(mat, RNG::UNIFORM, low, high); } string abspath(const string &relpath) { return TestSystem::instance().workingDir() + relpath; } int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/, const char *err_msg, const char * /*file_name*/, int /*line*/, void * /*userdata*/) { TestSystem::instance().printError(err_msg); return 0; } /////////// matchTemplate //////////////////////// //void InitMatchTemplate() //{ // Mat src; gen(src, 500, 500, CV_32F, 0, 1); // Mat templ; gen(templ, 500, 500, CV_32F, 0, 1); //#ifdef USE_OPENCL // ocl::oclMat d_src(src), d_templ(templ), d_dst; // ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); //#endif //} TEST(matchTemplate) { //InitMatchTemplate(); Mat src, templ, dst; int templ_size = 5; for (int size = 1000; size <= 4000; size *= 2) { int all_type[] = {CV_32FC1, CV_32FC4}; std::string type_name[] = {"CV_32FC1", "CV_32FC4"}; for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { for(templ_size = 5; templ_size <= 5; templ_size *= 5) { gen(src, size, size, all_type[j], 0, 1); SUBTEST << src.cols << 'x' << src.rows << "; " << type_name[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR"; gen(templ, templ_size, templ_size, all_type[j], 0, 1); matchTemplate(src, templ, dst, CV_TM_CCORR); CPU_ON; matchTemplate(src, templ, dst, CV_TM_CCORR); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_src(src), d_templ, d_dst; d_templ.upload(templ); WARMUP_ON; ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); WARMUP_OFF; GPU_ON; ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_templ.upload(templ); ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); d_dst.download(dst); GPU_FULL_OFF; #endif } } int all_type_8U[] = {CV_8UC1}; std::string type_name_8U[] = {"CV_8UC1"}; for (size_t j = 0; j < sizeof(all_type_8U) / sizeof(int); j++) { for(templ_size = 5; templ_size <= 5; templ_size *= 5) { SUBTEST << src.cols << 'x' << src.rows << "; " << type_name_8U[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR_NORMED"; gen(src, size, size, all_type_8U[j], 0, 255); gen(templ, templ_size, templ_size, all_type_8U[j], 0, 255); matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); CPU_ON; matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_src(src); ocl::oclMat d_templ(templ), d_dst; WARMUP_ON; ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); WARMUP_OFF; GPU_ON; ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_templ.upload(templ); ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); d_dst.download(dst); GPU_FULL_OFF; #endif } } } } ///////////// PyrLKOpticalFlow //////////////////////// TEST(PyrLKOpticalFlow) { std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"}; std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"}; for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++) { Mat frame0 = imread(abspath(images1[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE); if (frame0.empty()) { std::string errstr = "can't open " + images1[i]; throw runtime_error(errstr); } Mat frame1 = imread(abspath(images2[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE); if (frame1.empty()) { std::string errstr = "can't open " + images2[i]; throw runtime_error(errstr); } Mat gray_frame; if (i == 0) { cvtColor(frame0, gray_frame, COLOR_BGR2GRAY); } for (int points = 1000; points <= 4000; points *= 2) { if (i == 0) SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points"; else SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points"; Mat nextPts_cpu; Mat status_cpu; vector pts; goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0); vector nextPts; vector status; vector err; calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); CPU_ON; calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); CPU_OFF; #ifdef USE_OPENCL ocl::PyrLKOpticalFlow d_pyrLK; ocl::oclMat d_frame0(frame0); ocl::oclMat d_frame1(frame1); ocl::oclMat d_pts; Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]); d_pts.upload(pts_mat); ocl::oclMat d_nextPts; ocl::oclMat d_status; ocl::oclMat d_err; WARMUP_ON; d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); WARMUP_OFF; GPU_ON; d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); GPU_OFF; GPU_FULL_ON; d_frame0.upload(frame0); d_frame1.upload(frame1); d_pts.upload(pts_mat); d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); if (!d_nextPts.empty()) { d_nextPts.download(nextPts_cpu); } if (!d_status.empty()) { d_status.download(status_cpu); } GPU_FULL_OFF; #endif } } } ///////////// pyrDown ////////////////////// TEST(pyrDown) { Mat src, dst; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); pyrDown(src, dst); CPU_ON; pyrDown(src, dst); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_src(src); ocl::oclMat d_dst; WARMUP_ON; ocl::pyrDown(d_src, d_dst); WARMUP_OFF; GPU_ON; ocl::pyrDown(d_src, d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::pyrDown(d_src, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// pyrUp //////////////////////// TEST(pyrUp) { Mat src, dst; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 500; size <= 2000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); pyrUp(src, dst); CPU_ON; pyrUp(src, dst); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_src(src); ocl::oclMat d_dst; WARMUP_ON; ocl::pyrUp(d_src, d_dst); WARMUP_OFF; GPU_ON; ocl::pyrUp(d_src, d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::pyrUp(d_src, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Canny //////////////////////// TEST(Canny) { Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); if (img.empty()) { throw runtime_error("can't open aloeL.jpg"); } SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1"; Mat edges(img.size(), CV_8UC1); CPU_ON; Canny(img, edges, 50.0, 100.0); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_img(img); ocl::oclMat d_edges; ocl::CannyBuf d_buf; WARMUP_ON; ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); WARMUP_OFF; GPU_ON; ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); GPU_OFF; GPU_FULL_ON; d_img.upload(img); ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); d_edges.download(edges); GPU_FULL_OFF; #endif } ///////////// Haar //////////////////////// #ifdef USE_OPENCL namespace cv { namespace ocl { struct getRect { Rect operator()(const CvAvgComp &e) const { return e.rect; } }; class CascadeClassifier_GPU : public OclCascadeClassifier { public: void detectMultiScale(oclMat &image, std::vector& faces, double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0, Size minSize = Size(), Size maxSize = Size()) { (void)maxSize; MemStorage storage(cvCreateMemStorage(0)); //CvMat img=image; CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize); vector vecAvgComp; Seq(objs).copyTo(vecAvgComp); faces.resize(vecAvgComp.size()); std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect()); } }; } } #endif TEST(Haar) { Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE); if (img.empty()) { throw runtime_error("can't open basketball1.png"); } CascadeClassifier faceCascadeCPU; if (!faceCascadeCPU.load(abspath("haarcascade_frontalface_alt.xml"))) { throw runtime_error("can't load haarcascade_frontalface_alt.xml"); } vector faces; SUBTEST << img.cols << "x" << img.rows << "; scale image"; CPU_ON; faceCascadeCPU.detectMultiScale(img, faces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); CPU_OFF; #ifdef USE_OPENCL ocl::CascadeClassifier_GPU faceCascade; if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml"))) { throw runtime_error("can't load haarcascade_frontalface_alt.xml"); } ocl::oclMat d_img(img); faces.clear(); WARMUP_ON; faceCascade.detectMultiScale(d_img, faces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); WARMUP_OFF; faces.clear(); GPU_ON; faceCascade.detectMultiScale(d_img, faces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); GPU_OFF; GPU_FULL_ON; d_img.upload(img); faceCascade.detectMultiScale(d_img, faces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); GPU_FULL_OFF; #endif } ///////////// blend //////////////////////// template void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold) { result_gold.create(img1.size(), img1.type()); int cn = img1.channels(); for (int y = 0; y < img1.rows; ++y) { const float *weights1_row = weights1.ptr(y); const float *weights2_row = weights2.ptr(y); const T *img1_row = img1.ptr(y); const T *img2_row = img2.ptr(y); T *result_gold_row = result_gold.ptr(y); for (int x = 0; x < img1.cols * cn; ++x) { float w1 = weights1_row[x / cn]; float w2 = weights2_row[x / cn]; result_gold_row[x] = static_cast((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f)); } } } TEST(blend) { Mat src1, src2, weights1, weights2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] << " and CV_32FC1"; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(weights1, size, size, CV_32FC1, 0, 1); gen(weights2, size, size, CV_32FC1, 0, 1); blendLinearGold(src1, src2, weights1, weights2, dst); CPU_ON; blendLinearGold(src1, src2, weights1, weights2, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); d_weights1.upload(weights1); d_weights2.upload(weights2); WARMUP_ON; ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); WARMUP_OFF; GPU_ON; ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); d_weights1.upload(weights1); d_weights2.upload(weights2); ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// columnSum//////////////////////// TEST(columnSum) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif for (int size = 1000; size <= 4000; size *= 2) { SUBTEST << size << 'x' << size << "; CV_32FC1"; gen(src, size, size, CV_32FC1, 0, 256); CPU_ON; dst.create(src.size(), src.type()); for (int i = 1; i < src.rows; ++i) { for (int j = 0; j < src.cols; ++j) { dst.at(i, j) = src.at(i, j) += src.at(i - 1, j); } } CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::columnSum(d_src, d_dst); WARMUP_OFF; GPU_ON; ocl::columnSum(d_src, d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::columnSum(d_src, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } ///////////// HOG//////////////////////// TEST(HOG) { Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE); if (src.empty()) { throw runtime_error("can't open road.png"); } cv::HOGDescriptor hog; hog.setSVMDetector(hog.getDefaultPeopleDetector()); std::vector found_locations; SUBTEST << 768 << 'x' << 576 << "; road.png"; hog.detectMultiScale(src, found_locations); CPU_ON; hog.detectMultiScale(src, found_locations); CPU_OFF; #ifdef USE_OPENCL cv::ocl::HOGDescriptor ocl_hog; ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector()); ocl::oclMat d_src; d_src.upload(src); WARMUP_ON; ocl_hog.detectMultiScale(d_src, found_locations); WARMUP_OFF; GPU_ON; ocl_hog.detectMultiScale(d_src, found_locations); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl_hog.detectMultiScale(d_src, found_locations); GPU_FULL_OFF; #endif } ///////////// SURF //////////////////////// TEST(SURF) { Mat keypoints_cpu; Mat descriptors_cpu; Mat src = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); if (src.empty()) { throw runtime_error("can't open aloeL.jpg"); } SUBTEST << src.cols << "x" << src.rows << "; aloeL.jpg"; SURF surf; vector keypoints; Mat descriptors; surf(src, Mat(), keypoints, descriptors); CPU_ON; keypoints.clear(); surf(src, Mat(), keypoints, descriptors); CPU_OFF; #ifdef USE_OPENCL ocl::SURF_OCL d_surf; ocl::oclMat d_src(src); ocl::oclMat d_keypoints; ocl::oclMat d_descriptors; WARMUP_ON; d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); WARMUP_OFF; GPU_ON; d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); if (!d_keypoints.empty()) { d_keypoints.download(keypoints_cpu); } if (!d_descriptors.empty()) { d_descriptors.download(descriptors_cpu); } GPU_FULL_OFF; #endif } //////////////////// BruteForceMatch ///////////////// TEST(BruteForceMatcher) { Mat trainIdx_cpu; Mat distance_cpu; Mat allDist_cpu; Mat nMatches_cpu; for (int size = 1000; size <= 4000; size *= 2) { // Init CPU matcher int desc_len = 64; BFMatcher matcher(NORM_L2); Mat query; gen(query, size, desc_len, CV_32F, 0, 1); Mat train; gen(train, size, desc_len, CV_32F, 0, 1); // Output vector< vector > matches(2); #ifdef USE_OPENCL // Init GPU matcher ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist); ocl::oclMat d_query(query); ocl::oclMat d_train(train); ocl::oclMat d_trainIdx, d_distance, d_allDist, d_nMatches; #endif SUBTEST << size << "; match"; matcher.match(query, train, matches[0]); CPU_ON; matcher.match(query, train, matches[0]); CPU_OFF; #ifdef USE_OPENCL WARMUP_ON; d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); WARMUP_OFF; GPU_ON; d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); GPU_OFF; GPU_FULL_ON; d_query.upload(query); d_train.upload(train); d_matcher.match(d_query, d_train, matches[0]); GPU_FULL_OFF; #endif SUBTEST << size << "; knnMatch"; matcher.knnMatch(query, train, matches, 2); CPU_ON; matcher.knnMatch(query, train, matches, 2); CPU_OFF; #ifdef USE_OPENCL WARMUP_ON; d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); WARMUP_OFF; GPU_ON; d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); GPU_OFF; GPU_FULL_ON; d_query.upload(query); d_train.upload(train); d_matcher.knnMatch(d_query, d_train, matches, 2); GPU_FULL_OFF; #endif SUBTEST << size << "; radiusMatch"; float max_distance = 2.0f; matcher.radiusMatch(query, train, matches, max_distance); CPU_ON; matcher.radiusMatch(query, train, matches, max_distance); CPU_OFF; #ifdef USE_OPENCL d_trainIdx.release(); WARMUP_ON; d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); WARMUP_OFF; GPU_ON; d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); GPU_OFF; GPU_FULL_ON; d_query.upload(query); d_train.upload(train); d_matcher.radiusMatch(d_query, d_train, matches, max_distance); GPU_FULL_OFF; #endif } } ///////////// Lut //////////////////////// TEST(lut) { Mat src, lut, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_lut, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC3}; std::string type_name[] = {"CV_8UC1", "CV_8UC3"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j]; gen(src, size, size, all_type[j], 0, 256); gen(lut, 1, 256, CV_8UC1, 0, 1); gen(dst, size, size, all_type[j], 0, 256); LUT(src, lut, dst); CPU_ON; LUT(src, lut, dst); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); d_lut.upload(lut); WARMUP_ON; ocl::LUT(d_src, d_lut, d_dst); WARMUP_OFF; GPU_ON; ocl::LUT(d_src, d_lut, d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_lut.upload(lut); ocl::LUT(d_src, d_lut, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Exp //////////////////////// TEST(Exp) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif for (int size = 1000; size <= 4000; size *= 2) { SUBTEST << size << 'x' << size << "; CV_32FC1"; gen(src, size, size, CV_32FC1, 0, 256); gen(dst, size, size, CV_32FC1, 0, 256); exp(src, dst); CPU_ON; exp(src, dst); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::exp(d_src, d_dst); WARMUP_OFF; GPU_ON; ocl::exp(d_src, d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::exp(d_src, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } ///////////// LOG //////////////////////// TEST(Log) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif for (int size = 1000; size <= 4000; size *= 2) { SUBTEST << size << 'x' << size << "; 32F"; gen(src, size, size, CV_32F, 1, 10); log(src, dst); CPU_ON; log(src, dst); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::log(d_src, d_dst); WARMUP_OFF; GPU_ON; ocl::log(d_src, d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::log(d_src, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } ///////////// Add //////////////////////// TEST(Add) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j]; gen(src1, size, size, all_type[j], 0, 1); gen(src2, size, size, all_type[j], 0, 1); add(src1, src2, dst); CPU_ON; add(src1, src2, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::add(d_src1, d_src2, d_dst); WARMUP_OFF; GPU_ON; ocl::add(d_src1, d_src2, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::add(d_src1, d_src2, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Mul //////////////////////// TEST(Mul) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); multiply(src1, src2, dst); CPU_ON; multiply(src1, src2, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::multiply(d_src1, d_src2, d_dst); WARMUP_OFF; GPU_ON; ocl::multiply(d_src1, d_src2, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::multiply(d_src1, d_src2, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Div //////////////////////// TEST(Div) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j]; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); divide(src1, src2, dst); CPU_ON; divide(src1, src2, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::divide(d_src1, d_src2, d_dst); WARMUP_OFF; GPU_ON; ocl::divide(d_src1, d_src2, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::divide(d_src1, d_src2, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Absdiff //////////////////////// TEST(Absdiff) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); absdiff(src1, src2, dst); CPU_ON; absdiff(src1, src2, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::absdiff(d_src1, d_src2, d_dst); WARMUP_OFF; GPU_ON; ocl::absdiff(d_src1, d_src2, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::absdiff(d_src1, d_src2, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// CartToPolar //////////////////////// TEST(CartToPolar) { Mat src1, src2, dst, dst1; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst, d_dst1; #endif int all_type[] = {CV_32FC1}; std::string type_name[] = {"CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j]; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); gen(dst1, size, size, all_type[j], 0, 256); cartToPolar(src1, src2, dst, dst1, 1); CPU_ON; cartToPolar(src1, src2, dst, dst1, 1); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); WARMUP_OFF; GPU_ON; ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); d_dst.download(dst); d_dst1.download(dst1); GPU_FULL_OFF; #endif } } } ///////////// PolarToCart //////////////////////// TEST(PolarToCart) { Mat src1, src2, dst, dst1; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst, d_dst1; #endif int all_type[] = {CV_32FC1}; std::string type_name[] = {"CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); gen(dst1, size, size, all_type[j], 0, 256); polarToCart(src1, src2, dst, dst1, 1); CPU_ON; polarToCart(src1, src2, dst, dst1, 1); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); WARMUP_OFF; GPU_ON; ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); d_dst.download(dst); d_dst1.download(dst1); GPU_FULL_OFF; #endif } } } ///////////// Magnitude //////////////////////// TEST(magnitude) { Mat x, y, mag; #ifdef USE_OPENCL ocl::oclMat d_x, d_y, d_mag; #endif int all_type[] = {CV_32FC1}; std::string type_name[] = {"CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j]; gen(x, size, size, all_type[j], 0, 1); gen(y, size, size, all_type[j], 0, 1); magnitude(x, y, mag); CPU_ON; magnitude(x, y, mag); CPU_OFF; #ifdef USE_OPENCL d_x.upload(x); d_y.upload(y); WARMUP_ON; ocl::magnitude(d_x, d_y, d_mag); WARMUP_OFF; GPU_ON; ocl::magnitude(d_x, d_y, d_mag); GPU_OFF; GPU_FULL_ON; d_x.upload(x); d_y.upload(y); ocl::magnitude(d_x, d_y, d_mag); d_mag.download(mag); GPU_FULL_OFF; #endif } } } ///////////// Transpose //////////////////////// TEST(Transpose) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j]; gen(src, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); transpose(src, dst); CPU_ON; transpose(src, dst); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::transpose(d_src, d_dst); WARMUP_OFF; GPU_ON; ocl::transpose(d_src, d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::transpose(d_src, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Flip //////////////////////// TEST(Flip) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; FLIP_BOTH"; gen(src, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); flip(src, dst, 0); CPU_ON; flip(src, dst, 0); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::flip(d_src, d_dst, 0); WARMUP_OFF; GPU_ON; ocl::flip(d_src, d_dst, 0); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::flip(d_src, d_dst, 0); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// minMax //////////////////////// TEST(minMax) { Mat src; #ifdef USE_OPENCL ocl::oclMat d_src; #endif double min_val, max_val; Point min_loc, max_loc; int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j]; gen(src, size, size, all_type[j], 0, 256); CPU_ON; minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::minMax(d_src, &min_val, &max_val); WARMUP_OFF; GPU_ON; ocl::minMax(d_src, &min_val, &max_val); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::minMax(d_src, &min_val, &max_val); GPU_FULL_OFF; #endif } } } ///////////// minMaxLoc //////////////////////// TEST(minMaxLoc) { Mat src; #ifdef USE_OPENCL ocl::oclMat d_src; #endif double min_val, max_val; Point min_loc, max_loc; int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 1); CPU_ON; minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); WARMUP_OFF; GPU_ON; ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); GPU_FULL_OFF; #endif } } } ///////////// Sum //////////////////////// TEST(Sum) { Mat src; Scalar cpures, gpures; #ifdef USE_OPENCL ocl::oclMat d_src; #endif int all_type[] = {CV_8UC1, CV_32SC1}; std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); cpures = sum(src); CPU_ON; cpures = sum(src); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; gpures = ocl::sum(d_src); WARMUP_OFF; GPU_ON; gpures = ocl::sum(d_src); GPU_OFF; GPU_FULL_ON; d_src.upload(src); gpures = ocl::sum(d_src); GPU_FULL_OFF; #endif } } } ///////////// countNonZero //////////////////////// TEST(countNonZero) { Mat src; #ifdef USE_OPENCL ocl::oclMat d_src; #endif int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); countNonZero(src); CPU_ON; countNonZero(src); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::countNonZero(d_src); WARMUP_OFF; GPU_ON; ocl::countNonZero(d_src); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::countNonZero(d_src); GPU_FULL_OFF; #endif } } } ///////////// Phase //////////////////////// TEST(Phase) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif int all_type[] = {CV_32FC1}; std::string type_name[] = {"CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); phase(src1, src2, dst, 1); CPU_ON; phase(src1, src2, dst, 1); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::phase(d_src1, d_src2, d_dst, 1); WARMUP_OFF; GPU_ON; ocl::phase(d_src1, d_src2, d_dst, 1); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::phase(d_src1, d_src2, d_dst, 1); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// bitwise_and//////////////////////// TEST(bitwise_and) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif int all_type[] = {CV_8UC1, CV_32SC1}; std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); bitwise_and(src1, src2, dst); CPU_ON; bitwise_and(src1, src2, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::bitwise_and(d_src1, d_src2, d_dst); WARMUP_OFF; GPU_ON; ocl::bitwise_and(d_src1, d_src2, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::bitwise_and(d_src1, d_src2, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// bitwise_or//////////////////////// TEST(bitwise_or) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif int all_type[] = {CV_8UC1, CV_32SC1}; std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j]; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); bitwise_or(src1, src2, dst); CPU_ON; bitwise_or(src1, src2, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::bitwise_or(d_src1, d_src2, d_dst); WARMUP_OFF; GPU_ON; ocl::bitwise_or(d_src1, d_src2, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::bitwise_or(d_src1, d_src2, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// bitwise_xor//////////////////////// TEST(bitwise_xor) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif int all_type[] = {CV_8UC1, CV_32SC1}; std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j]; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); bitwise_xor(src1, src2, dst); CPU_ON; bitwise_xor(src1, src2, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::bitwise_xor(d_src1, d_src2, d_dst); WARMUP_OFF; GPU_ON; ocl::bitwise_xor(d_src1, d_src2, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::bitwise_xor(d_src1, d_src2, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// bitwise_not//////////////////////// TEST(bitwise_not) { Mat src1, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_dst; #endif int all_type[] = {CV_8UC1, CV_32SC1}; std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src1, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); bitwise_not(src1, dst); CPU_ON; bitwise_not(src1, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); WARMUP_ON; ocl::bitwise_not(d_src1, d_dst); WARMUP_OFF; GPU_ON; ocl::bitwise_not(d_src1, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); ocl::bitwise_not(d_src1, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// compare//////////////////////// TEST(compare) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif int CMP_EQ = 0; int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); compare(src1, src2, dst, CMP_EQ); CPU_ON; compare(src1, src2, dst, CMP_EQ); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); WARMUP_OFF; GPU_ON; ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// pow //////////////////////// TEST(pow) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_32FC1}; std::string type_name[] = {"CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 100); gen(dst, size, size, all_type[j], 0, 100); pow(src, -2.0, dst); CPU_ON; pow(src, -2.0, dst); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); d_dst.upload(dst); WARMUP_ON; ocl::pow(d_src, -2.0, d_dst); WARMUP_OFF; GPU_ON; ocl::pow(d_src, -2.0, d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::pow(d_src, -2.0, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// MagnitudeSqr//////////////////////// TEST(MagnitudeSqr) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif int all_type[] = {CV_32FC1}; std::string type_name[] = {"CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++) { SUBTEST << size << 'x' << size << "; " << type_name[t]; gen(src1, size, size, all_type[t], 0, 256); gen(src2, size, size, all_type[t], 0, 256); gen(dst, size, size, all_type[t], 0, 256); for (int i = 0; i < src1.rows; ++i) for (int j = 0; j < src1.cols; ++j) { float val1 = src1.at(i, j); float val2 = src2.at(i, j); ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; } CPU_ON; for (int i = 0; i < src1.rows; ++i) for (int j = 0; j < src1.cols; ++j) { float val1 = src1.at(i, j); float val2 = src2.at(i, j); ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; } CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::magnitudeSqr(d_src1, d_src2, d_dst); WARMUP_OFF; GPU_ON; ocl::magnitudeSqr(d_src1, d_src2, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::magnitudeSqr(d_src1, d_src2, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// AddWeighted//////////////////////// TEST(AddWeighted) { Mat src1, src2, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_dst; #endif double alpha = 2.0, beta = 1.0, gama = 3.0; int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); addWeighted(src1, alpha, src2, beta, gama, dst); CPU_ON; addWeighted(src1, alpha, src2, beta, gama, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); WARMUP_ON; ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); WARMUP_OFF; GPU_ON; ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Blur//////////////////////// TEST(Blur) { Mat src1, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_dst; #endif Size ksize = Size(3, 3); int bordertype = BORDER_CONSTANT; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src1, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); blur(src1, dst, ksize, Point(-1, -1), bordertype); CPU_ON; blur(src1, dst, ksize, Point(-1, -1), bordertype); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); WARMUP_ON; ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); WARMUP_OFF; GPU_ON; ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Laplacian//////////////////////// TEST(Laplacian) { Mat src1, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_dst; #endif int ksize = 3; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src1, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); Laplacian(src1, dst, -1, ksize, 1); CPU_ON; Laplacian(src1, dst, -1, ksize, 1); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); WARMUP_ON; ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); WARMUP_OFF; GPU_ON; ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Erode //////////////////// TEST(Erode) { Mat src, dst, ker; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(256)); ker = getStructuringElement(MORPH_RECT, Size(3, 3)); erode(src, dst, ker); CPU_ON; erode(src, dst, ker); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::erode(d_src, d_dst, ker); WARMUP_OFF; GPU_ON; ocl::erode(d_src, d_dst, ker); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::erode(d_src, d_dst, ker); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Sobel //////////////////////// TEST(Sobel) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int dx = 1; int dy = 1; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); Sobel(src, dst, -1, dx, dy); CPU_ON; Sobel(src, dst, -1, dx, dy); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::Sobel(d_src, d_dst, -1, dx, dy); WARMUP_OFF; GPU_ON; ocl::Sobel(d_src, d_dst, -1, dx, dy); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::Sobel(d_src, d_dst, -1, dx, dy); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Scharr //////////////////////// TEST(Scharr) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int dx = 1; int dy = 0; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); Scharr(src, dst, -1, dx, dy); CPU_ON; Scharr(src, dst, -1, dx, dy); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::Scharr(d_src, d_dst, -1, dx, dy); WARMUP_OFF; GPU_ON; ocl::Scharr(d_src, d_dst, -1, dx, dy); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::Scharr(d_src, d_dst, -1, dx, dy); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// GaussianBlur //////////////////////// TEST(GaussianBlur) { Mat src, dst; int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); GaussianBlur(src, dst, Size(9, 9), 0); CPU_ON; GaussianBlur(src, dst, Size(9, 9), 0); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_src(src); ocl::oclMat d_dst(src.size(), src.type()); ocl::oclMat d_buf; WARMUP_ON; ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); WARMUP_OFF; GPU_ON; ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// equalizeHist //////////////////////// TEST(equalizeHist) { Mat src, dst; int all_type[] = {CV_8UC1}; std::string type_name[] = {"CV_8UC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); equalizeHist(src, dst); CPU_ON; equalizeHist(src, dst); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_src(src); ocl::oclMat d_dst; ocl::oclMat d_hist; ocl::oclMat d_buf; WARMUP_ON; ocl::equalizeHist(d_src, d_dst); WARMUP_OFF; GPU_ON; ocl::equalizeHist(d_src, d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::equalizeHist(d_src, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } /////////// CopyMakeBorder ////////////////////// TEST(CopyMakeBorder) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_dst; #endif int bordertype = BORDER_CONSTANT; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); CPU_ON; copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_src(src); WARMUP_ON; ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); WARMUP_OFF; GPU_ON; ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// cornerMinEigenVal //////////////////////// TEST(cornerMinEigenVal) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_dst; #endif int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4); int borderType = BORDER_REFLECT; int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); CPU_ON; cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_src(src); WARMUP_ON; ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); WARMUP_OFF; GPU_ON; ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// cornerHarris //////////////////////// TEST(cornerHarris) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; BORDER_REFLECT"; gen(src, size, size, all_type[j], 0, 1); cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT); CPU_ON; cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); WARMUP_OFF; GPU_ON; ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// integral //////////////////////// TEST(integral) { Mat src, sum; #ifdef USE_OPENCL ocl::oclMat d_src, d_sum, d_buf; #endif int all_type[] = {CV_8UC1}; std::string type_name[] = {"CV_8UC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); integral(src, sum); CPU_ON; integral(src, sum); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::integral(d_src, d_sum); WARMUP_OFF; GPU_ON; ocl::integral(d_src, d_sum); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::integral(d_src, d_sum); d_sum.download(sum); GPU_FULL_OFF; #endif } } } ///////////// WarpAffine //////////////////////// TEST(WarpAffine) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif static const double coeffs[2][3] = { {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, {sin(3.14 / 6), cos(3.14 / 6), -100.0} }; Mat M(2, 3, CV_64F, (void *)coeffs); int interpolation = INTER_NEAREST; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); Size size1 = Size(size, size); warpAffine(src, dst, M, size1, interpolation); CPU_ON; warpAffine(src, dst, M, size1, interpolation); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::warpAffine(d_src, d_dst, M, size1, interpolation); WARMUP_OFF; GPU_ON; ocl::warpAffine(d_src, d_dst, M, size1, interpolation); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::warpAffine(d_src, d_dst, M, size1, interpolation); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// WarpPerspective //////////////////////// TEST(WarpPerspective) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif static const double coeffs[3][3] = { {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, {sin(3.14 / 6), cos(3.14 / 6), -100.0}, {0.0, 0.0, 1.0} }; Mat M(3, 3, CV_64F, (void *)coeffs); int interpolation = INTER_NEAREST; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); Size size1 = Size(size, size); warpPerspective(src, dst, M, size1, interpolation); CPU_ON; warpPerspective(src, dst, M, size1, interpolation); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); WARMUP_OFF; GPU_ON; ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// resize //////////////////////// TEST(resize) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; up"; gen(src, size, size, all_type[j], 0, 256); resize(src, dst, Size(), 2.0, 2.0); CPU_ON; resize(src, dst, Size(), 2.0, 2.0); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); WARMUP_OFF; GPU_ON; ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); d_dst.download(dst); GPU_FULL_OFF; #endif } } for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; down"; gen(src, size, size, all_type[j], 0, 256); resize(src, dst, Size(), 0.5, 0.5); CPU_ON; resize(src, dst, Size(), 0.5, 0.5); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); WARMUP_OFF; GPU_ON; ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// threshold//////////////////////// TEST(threshold) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif for (int size = 1000; size <= 4000; size *= 2) { SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY"; gen(src, size, size, CV_8U, 0, 100); threshold(src, dst, 50.0, 0.0, THRESH_BINARY); CPU_ON; threshold(src, dst, 50.0, 0.0, THRESH_BINARY); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); WARMUP_OFF; GPU_ON; ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); d_dst.download(dst); GPU_FULL_OFF; #endif } for (int size = 1000; size <= 4000; size *= 2) { SUBTEST << size << 'x' << size << "; 32FC1; THRESH_TRUNC [NPP]"; gen(src, size, size, CV_32FC1, 0, 100); threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); CPU_ON; threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); WARMUP_OFF; GPU_ON; ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); d_dst.download(dst); GPU_FULL_OFF; #endif } } ///////////// meanShiftFiltering//////////////////////// TEST(meanShiftFiltering) { int sp = 10, sr = 10; Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif for (int size = 1000; size <= 4000; size *= 2) { SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4"; gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256)); pyrMeanShiftFiltering(src, dst, sp, sr); CPU_ON; pyrMeanShiftFiltering(src, dst, sp, sr); CPU_OFF; #ifdef USE_OPENCL gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); d_src.upload(src); WARMUP_ON; ocl::meanShiftFiltering(d_src, d_dst, sp, sr); WARMUP_OFF; GPU_ON; ocl::meanShiftFiltering(d_src, d_dst, sp, sr); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::meanShiftFiltering(d_src, d_dst, sp, sr); d_dst.download(dst); GPU_FULL_OFF; #endif } } ///////////// meanShiftProc//////////////////////// COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab) { int isr2 = sr * sr; int c0, c1, c2, c3; int iter; uchar *ptr = NULL; uchar *pstart = NULL; int revx = 0, revy = 0; c0 = sptr[0]; c1 = sptr[1]; c2 = sptr[2]; c3 = sptr[3]; // iterate meanshift procedure for (iter = 0; iter < maxIter; iter++) { int count = 0; int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) int minx = x0 - sp; int miny = y0 - sp; int maxx = x0 + sp; int maxy = y0 + sp; //deal with the image boundary if (minx < 0) { minx = 0; } if (miny < 0) { miny = 0; } if (maxx >= size.width) { maxx = size.width - 1; } if (maxy >= size.height) { maxy = size.height - 1; } if (iter == 0) { pstart = sptr; } else { pstart = pstart + revy * sstep + (revx << 2); //point to the new position } ptr = pstart; ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) { int rowCount = 0; int x = minx; #if CV_ENABLE_UNROLLED for (; x + 4 <= maxx; x += 4, ptr += 16) { int t0, t1, t2; t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; s2 += t2; sx += x; rowCount++; } t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; s2 += t2; sx += x + 1; rowCount++; } t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; s2 += t2; sx += x + 2; rowCount++; } t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; s2 += t2; sx += x + 3; rowCount++; } } #endif for (; x <= maxx; x++, ptr += 4) { int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; s2 += t2; sx += x; rowCount++; } } if (rowCount == 0) { continue; } count += rowCount; sy += y * rowCount; } if (count == 0) { break; } int x1 = sx / count; int y1 = sy / count; s0 = s0 / count; s1 = s1 / count; s2 = s2 / count; bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); //revise the pointer corresponding to the new (y0,x0) revx = x1 - x0; revy = y1 - y0; x0 = x1; y0 = y1; c0 = s0; c1 = s1; c2 = s2; if (stopFlag) { break; } } //for iter dptr[0] = (uchar)c0; dptr[1] = (uchar)c1; dptr[2] = (uchar)c2; dptr[3] = (uchar)c3; COOR coor; coor.x = static_cast(x0); coor.y = static_cast(y0); return coor; } void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit) { if (src_roi.empty()) { CV_Error(CV_StsBadArg, "The input image is empty"); } if (src_roi.depth() != CV_8U || src_roi.channels() != 4) { CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported"); } CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) && (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows)); CV_Assert(!(dstCoor_roi.step & 0x3)); if (!(crit.type & cv::TermCriteria::MAX_ITER)) { crit.maxCount = 5; } int maxIter = std::min(std::max(crit.maxCount, 1), 100); float eps; if (!(crit.type & cv::TermCriteria::EPS)) { eps = 1.f; } eps = (float)std::max(crit.epsilon, 0.0); int tab[512]; for (int i = 0; i < 512; i++) { tab[i] = (i - 255) * (i - 255); } uchar *sptr = src_roi.data; uchar *dptr = dst_roi.data; short *dCoorptr = (short *)dstCoor_roi.data; int sstep = (int)src_roi.step; int dstep = (int)dst_roi.step; int dCoorstep = (int)dstCoor_roi.step >> 1; cv::Size size = src_roi.size(); for (int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1)) { for (int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2) { *((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); } } } TEST(meanShiftProc) { Mat src, dst, dstCoor_roi; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst, d_dstCoor_roi; #endif TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1); for (int size = 1000; size <= 4000; size *= 2) { SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 "; gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256)); meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); CPU_ON; meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); WARMUP_OFF; GPU_ON; ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); d_dst.download(dst); d_dstCoor_roi.download(dstCoor_roi); GPU_FULL_OFF; #endif } } ///////////// ConvertTo//////////////////////// TEST(ConvertTo) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] << " to 32FC1"; gen(src, size, size, all_type[j], 0, 256); //gen(dst, size, size, all_type[j], 0, 256); //d_dst.upload(dst); src.convertTo(dst, CV_32FC1); CPU_ON; src.convertTo(dst, CV_32FC1); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; d_src.convertTo(d_dst, CV_32FC1); WARMUP_OFF; GPU_ON; d_src.convertTo(d_dst, CV_32FC1); GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_src.convertTo(d_dst, CV_32FC1); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// copyTo//////////////////////// TEST(copyTo) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); //gen(dst, size, size, all_type[j], 0, 256); //d_dst.upload(dst); src.copyTo(dst); CPU_ON; src.copyTo(dst); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; d_src.copyTo(d_dst); WARMUP_OFF; GPU_ON; d_src.copyTo(d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_src.copyTo(d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// setTo//////////////////////// TEST(setTo) { Mat src, dst; Scalar val(1, 2, 3, 4); #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); src.setTo(val); CPU_ON; src.setTo(val); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; d_src.setTo(val); WARMUP_OFF; GPU_ON; d_src.setTo(val); GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_src.setTo(val); GPU_FULL_OFF; #endif } } } ///////////// Merge//////////////////////// TEST(Merge) { Mat dst; #ifdef USE_OPENCL ocl::oclMat d_dst; #endif int channels = 4; int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; Size size1 = Size(size, size); std::vector src(channels); for (int i = 0; i < channels; ++i) { src[i] = Mat(size1, all_type[j], cv::Scalar::all(i)); } merge(src, dst); CPU_ON; merge(src, dst); CPU_OFF; #ifdef USE_OPENCL std::vector d_src(channels); for (int i = 0; i < channels; ++i) { d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i)); } WARMUP_ON; ocl::merge(d_src, d_dst); WARMUP_OFF; GPU_ON; ocl::merge(d_src, d_dst); GPU_OFF; GPU_FULL_ON; for (int i = 0; i < channels; ++i) { d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i)); } ocl::merge(d_src, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// Split//////////////////////// TEST(Split) { //int channels = 4; int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j]; Size size1 = Size(size, size); Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); std::vector dst; split(src, dst); CPU_ON; split(src, dst); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); std::vector d_dst; WARMUP_ON; ocl::split(d_src, d_dst); WARMUP_OFF; GPU_ON; ocl::split(d_src, d_dst); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::split(d_src, d_dst); GPU_FULL_OFF; #endif } } } ///////////// norm//////////////////////// TEST(norm) { Mat src, buf; #ifdef USE_OPENCL ocl::oclMat d_src, d_buf; #endif for (int size = 1000; size <= 4000; size *= 2) { SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF"; gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); norm(src, NORM_INF); CPU_ON; norm(src, NORM_INF); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); d_buf.upload(buf); WARMUP_ON; ocl::norm(d_src, d_buf, NORM_INF); WARMUP_OFF; GPU_ON; ocl::norm(d_src, d_buf, NORM_INF); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::norm(d_src, d_buf, NORM_INF); GPU_FULL_OFF; #endif } } ///////////// remap//////////////////////// TEST(remap) { Mat src, dst, xmap, ymap; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst, d_xmap, d_ymap; #endif int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; int interpolation = INTER_LINEAR; int borderMode = BORDER_CONSTANT; for (int size = 1000; size <= 4000; size *= 2) { for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++) { SUBTEST << size << 'x' << size << "; src " << type_name[t] << "; map CV_32FC1"; gen(src, size, size, all_type[t], 0, 256); xmap.create(size, size, CV_32FC1); dst.create(size, size, CV_32FC1); ymap.create(size, size, CV_32FC1); for (int i = 0; i < size; ++i) { float *xmap_row = xmap.ptr(i); float *ymap_row = ymap.ptr(i); for (int j = 0; j < size; ++j) { xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f; ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f; } } remap(src, dst, xmap, ymap, interpolation, borderMode); CPU_ON; remap(src, dst, xmap, ymap, interpolation, borderMode); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); d_dst.upload(dst); d_xmap.upload(xmap); d_ymap.upload(ymap); WARMUP_ON; ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); WARMUP_OFF; GPU_ON; ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// cvtColor//////////////////////// TEST(cvtColor) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_8UC4}; std::string type_name[] = {"CV_8UC4"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { gen(src, size, size, all_type[j], 0, 256); SUBTEST << size << "x" << size << "; " << type_name[j] << " ; CV_RGBA2GRAY"; cvtColor(src, dst, CV_RGBA2GRAY, 4); CPU_ON; cvtColor(src, dst, CV_RGBA2GRAY, 4); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); WARMUP_OFF; GPU_ON; ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// filter2D//////////////////////// TEST(filter2D) { Mat src; for (int size = 1000; size <= 4000; size *= 2) { int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { gen(src, size, size, all_type[j], 0, 256); for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1) { SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ; Mat kernel; gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); Mat dst; cv::filter2D(src, dst, -1, kernel); CPU_ON; cv::filter2D(src, dst, -1, kernel); CPU_OFF; #ifdef USE_OPENCL ocl::oclMat d_src(src); ocl::oclMat d_dst; WARMUP_ON; ocl::filter2D(d_src, d_dst, -1, kernel); WARMUP_OFF; GPU_ON; ocl::filter2D(d_src, d_dst, -1, kernel); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::filter2D(d_src, d_dst, -1, kernel); d_dst.download(dst); GPU_FULL_OFF; #endif } } } } ///////////// dft //////////////////////// TEST(dft) { Mat src, dst; #ifdef USE_OPENCL ocl::oclMat d_src, d_dst; #endif int all_type[] = {CV_32FC1, CV_32FC2}; std::string type_name[] = {"CV_32FC1", "CV_32FC2"}; for (int size = 1000; size <= 4000; size *= 2) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; complex-to-complex"; gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(1)); dft(src, dst); CPU_ON; dft(src, dst); CPU_OFF; #ifdef USE_OPENCL d_src.upload(src); WARMUP_ON; ocl::dft(d_src, d_dst, Size(size, size)); WARMUP_OFF; GPU_ON; ocl::dft(d_src, d_dst, Size(size, size)); GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::dft(d_src, d_dst, Size(size, size)); d_dst.download(dst); GPU_FULL_OFF; #endif } } } ///////////// gemm //////////////////////// TEST(gemm) { Mat src1, src2, src3, dst; #ifdef USE_OPENCL ocl::oclMat d_src1, d_src2, d_src3, d_dst; #endif for (int size = 1000; size <= 4000; size *= 2) { SUBTEST << size << 'x' << size; gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); gemm(src1, src2, 1.0, src3, 1.0, dst); CPU_ON; gemm(src1, src2, 1.0, src3, 1.0, dst); CPU_OFF; #ifdef USE_OPENCL d_src1.upload(src1); d_src2.upload(src2); d_src3.upload(src3); WARMUP_ON; ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); WARMUP_OFF; GPU_ON; ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); d_src3.upload(src3); ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); d_dst.download(dst); GPU_FULL_OFF; #endif } } int main(int argc, const char *argv[]) { #ifdef USE_OPENCL vector oclinfo; int num_devices = getDevice(oclinfo); if (num_devices < 1) { cerr << "no device found\n"; return -1; } int devidx = 0; for (size_t i = 0; i < oclinfo.size(); i++) { for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++) { printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str()); } } #endif redirectError(cvErrorCallback); const char *keys = "{ h help | false | print help message }" "{ f filter | | filter for test }" "{ w workdir | | set working directory }" "{ l list | false | show all tests }" "{ d device | 0 | device id }" "{ i iters | 10 | iteration count }" "{ m warmup | 1 | gpu warm up iteration count}" "{ t xtop | 1.1 | xfactor top boundary}" "{ b xbottom | 0.9 | xfactor bottom boundary}" "{ v verify | false | only run gpu once to verify if problems occur}"; CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) { cout << "Avaible options:" << endl; cmd.printMessage(); return 0; } #ifdef USE_OPENCL int device = cmd.get("device"); if (device < 0 || device >= num_devices) { cerr << "Invalid device ID" << endl; return -1; } if (cmd.get("verify")) { TestSystem::instance().setNumIters(1); TestSystem::instance().setGPUWarmupIters(0); TestSystem::instance().setCPUIters(0); } devidx = 0; for (size_t i = 0; i < oclinfo.size(); i++) { for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++) { if (device == devidx) { ocl::setDevice(oclinfo[i], (int)j); TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]); printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str()); goto END_DEV; } } } END_DEV: #endif string filter = cmd.get("filter"); string workdir = cmd.get("workdir"); bool list = cmd.get("list"); int iters = cmd.get("iters"); int wu_iters = cmd.get("warmup"); double x_top = cmd.get("xtop"); double x_bottom = cmd.get("xbottom"); TestSystem::instance().setTopThreshold(x_top); TestSystem::instance().setBottomThreshold(x_bottom); if (!filter.empty()) { TestSystem::instance().setTestFilter(filter); } if (!workdir.empty()) { if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\') { workdir += '/'; } TestSystem::instance().setWorkingDir(workdir); } if (list) { TestSystem::instance().setListMode(true); } TestSystem::instance().setNumIters(iters); TestSystem::instance().setGPUWarmupIters(wu_iters); TestSystem::instance().run(); return 0; }