opencv/samples/ocl/performance.cpp
2013-02-27 11:16:46 +08:00

4397 lines
109 KiB
C++

#include <iomanip>
#include <stdexcept>
#include <string>
#include <iostream>
#include <cstdio>
#include <vector>
#include <numeric>
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/video/video.hpp"
#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/features2d/features2d.hpp"
#define USE_OPENCL
#ifdef USE_OPENCL
#include "opencv2/ocl/ocl.hpp"
#endif
#define TAB " "
using namespace std;
using namespace cv;
// This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files
// All images needed in this test are in samples/gpu folder.
// For haar template, please rename it to facedetect.xml
void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high);
string abspath(const string &relpath);
int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *);
typedef struct
{
short x;
short y;
} COOR;
COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep,
cv::Size size, int sp, int sr, int maxIter, float eps, int *tab);
void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi,
int sp, int sr, cv::TermCriteria crit);
class Runnable
{
public:
explicit Runnable(const std::string &runname): name_(runname) {}
virtual ~Runnable() {}
const std::string &name() const
{
return name_;
}
virtual void run() = 0;
private:
std::string name_;
};
class TestSystem
{
public:
static TestSystem &instance()
{
static TestSystem me;
return me;
}
void setWorkingDir(const std::string &val)
{
working_dir_ = val;
}
const std::string &workingDir() const
{
return working_dir_;
}
void setTestFilter(const std::string &val)
{
test_filter_ = val;
}
const std::string &testFilter() const
{
return test_filter_;
}
void setNumIters(int num_iters)
{
num_iters_ = num_iters;
}
void setGPUWarmupIters(int num_iters)
{
gpu_warmup_iters_ = num_iters;
}
void setCPUIters(int num_iters)
{
cpu_num_iters_ = num_iters;
}
void setTopThreshold(double top)
{
top_ = top;
}
void setBottomThreshold(double bottom)
{
bottom_ = bottom;
}
void addInit(Runnable *init)
{
inits_.push_back(init);
}
void addTest(Runnable *test)
{
tests_.push_back(test);
}
void run();
// It's public because OpenCV callback uses it
void printError(const std::string &msg);
std::stringstream &startNewSubtest()
{
finishCurrentSubtest();
return cur_subtest_description_;
}
bool stop() const
{
return cur_iter_idx_ >= num_iters_;
}
bool cpu_stop() const
{
return cur_iter_idx_ >= cpu_num_iters_;
}
bool warmupStop()
{
return cur_warmup_idx_++ >= gpu_warmup_iters_;
}
void warmupComplete()
{
cur_warmup_idx_ = 0;
}
void cpuOn()
{
cpu_started_ = cv::getTickCount();
}
void cpuOff()
{
int64 delta = cv::getTickCount() - cpu_started_;
cpu_times_.push_back(delta);
++cur_iter_idx_;
}
void cpuComplete()
{
cpu_elapsed_ += meanTime(cpu_times_);
cur_subtest_is_empty_ = false;
cur_iter_idx_ = 0;
}
void gpuOn()
{
gpu_started_ = cv::getTickCount();
}
void gpuOff()
{
int64 delta = cv::getTickCount() - gpu_started_;
gpu_times_.push_back(delta);
++cur_iter_idx_;
}
void gpuComplete()
{
gpu_elapsed_ += meanTime(gpu_times_);
cur_subtest_is_empty_ = false;
cur_iter_idx_ = 0;
}
void gpufullOn()
{
gpu_full_started_ = cv::getTickCount();
}
void gpufullOff()
{
int64 delta = cv::getTickCount() - gpu_full_started_;
gpu_full_times_.push_back(delta);
++cur_iter_idx_;
}
void gpufullComplete()
{
gpu_full_elapsed_ += meanTime(gpu_full_times_);
cur_subtest_is_empty_ = false;
cur_iter_idx_ = 0;
}
bool isListMode() const
{
return is_list_mode_;
}
void setListMode(bool value)
{
is_list_mode_ = value;
}
void setRecordName(const std::string &name)
{
recordname_ = name;
}
void setCurrentTest(const std::string &name)
{
itname_ = name;
itname_changed_ = true;
}
private:
TestSystem():
cur_subtest_is_empty_(true), cpu_elapsed_(0),
gpu_elapsed_(0), gpu_full_elapsed_(0), speedup_total_(0.0),
num_subtests_called_(0),
speedup_faster_count_(0), speedup_slower_count_(0), speedup_equal_count_(0),
speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false),
num_iters_(10), cpu_num_iters_(2),
gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0),
record_(0), recordname_("performance"), itname_changed_(true)
{
cpu_times_.reserve(num_iters_);
gpu_times_.reserve(num_iters_);
gpu_full_times_.reserve(num_iters_);
}
void finishCurrentSubtest();
void resetCurrentSubtest()
{
cpu_elapsed_ = 0;
gpu_elapsed_ = 0;
gpu_full_elapsed_ = 0;
cur_subtest_description_.str("");
cur_subtest_is_empty_ = true;
cur_iter_idx_ = 0;
cpu_times_.clear();
gpu_times_.clear();
gpu_full_times_.clear();
}
double meanTime(const std::vector<int64> &samples);
void printHeading();
void printSummary();
void printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup);
void writeHeading();
void writeSummary();
void writeMetrics(double cpu_time, double gpu_time, double gpu_full_time,
double speedup, double fullspeedup,
double gpu_min, double gpu_max, double std_dev);
std::string working_dir_;
std::string test_filter_;
std::vector<Runnable *> inits_;
std::vector<Runnable *> tests_;
std::stringstream cur_subtest_description_;
bool cur_subtest_is_empty_;
int64 cpu_started_;
int64 gpu_started_;
int64 gpu_full_started_;
double cpu_elapsed_;
double gpu_elapsed_;
double gpu_full_elapsed_;
double speedup_total_;
double speedup_full_total_;
int num_subtests_called_;
int speedup_faster_count_;
int speedup_slower_count_;
int speedup_equal_count_;
int speedup_full_faster_count_;
int speedup_full_slower_count_;
int speedup_full_equal_count_;
bool is_list_mode_;
double top_;
double bottom_;
int num_iters_;
int cpu_num_iters_; //there's no need to set cpu running same times with gpu
int gpu_warmup_iters_; //gpu warm up times, default is 1
int cur_iter_idx_;
int cur_warmup_idx_; //current gpu warm up times
std::vector<int64> cpu_times_;
std::vector<int64> gpu_times_;
std::vector<int64> gpu_full_times_;
FILE *record_;
std::string recordname_;
std::string itname_;
bool itname_changed_;
};
#define GLOBAL_INIT(name) \
struct name##_init: Runnable { \
name##_init(): Runnable(#name) { \
TestSystem::instance().addInit(this); \
} \
void run(); \
} name##_init_instance; \
void name##_init::run()
#define TEST(name) \
struct name##_test: Runnable { \
name##_test(): Runnable(#name) { \
TestSystem::instance().addTest(this); \
} \
void run(); \
} name##_test_instance; \
void name##_test::run()
#define SUBTEST TestSystem::instance().startNewSubtest()
#define CPU_ON \
while (!TestSystem::instance().cpu_stop()) { \
TestSystem::instance().cpuOn()
#define CPU_OFF \
TestSystem::instance().cpuOff(); \
} TestSystem::instance().cpuComplete()
#define GPU_ON \
while (!TestSystem::instance().stop()) { \
TestSystem::instance().gpuOn()
#define GPU_OFF \
TestSystem::instance().gpuOff(); \
} TestSystem::instance().gpuComplete()
#define GPU_FULL_ON \
while (!TestSystem::instance().stop()) { \
TestSystem::instance().gpufullOn()
#define GPU_FULL_OFF \
TestSystem::instance().gpufullOff(); \
} TestSystem::instance().gpufullComplete()
#define WARMUP_ON \
while (!TestSystem::instance().warmupStop()) {
#define WARMUP_OFF \
} TestSystem::instance().warmupComplete()
void TestSystem::run()
{
if (is_list_mode_)
{
for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it)
{
cout << (*it)->name() << endl;
}
return;
}
// Run test initializers
for (vector<Runnable *>::iterator it = inits_.begin(); it != inits_.end(); ++it)
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
{
(*it)->run();
}
}
printHeading();
writeHeading();
// Run tests
for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it)
{
try
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
{
cout << endl << (*it)->name() << ":\n";
setCurrentTest((*it)->name());
//fprintf(record_,"%s\n",(*it)->name().c_str());
(*it)->run();
finishCurrentSubtest();
}
}
catch (const Exception &)
{
// Message is printed via callback
resetCurrentSubtest();
}
catch (const runtime_error &e)
{
printError(e.what());
resetCurrentSubtest();
}
}
#ifdef USE_OPENCL
printSummary();
writeSummary();
#endif
}
void TestSystem::finishCurrentSubtest()
{
if (cur_subtest_is_empty_)
// There is no need to print subtest statistics
{
return;
}
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0;
double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
speedup_total_ += speedup;
double fullspeedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_full_elapsed_);
speedup_full_total_ += fullspeedup;
if (speedup > top_)
{
speedup_faster_count_++;
}
else if (speedup < bottom_)
{
speedup_slower_count_++;
}
else
{
speedup_equal_count_++;
}
if (fullspeedup > top_)
{
speedup_full_faster_count_++;
}
else if (fullspeedup < bottom_)
{
speedup_full_slower_count_++;
}
else
{
speedup_full_equal_count_++;
}
// compute min, max and
std::sort(gpu_times_.begin(), gpu_times_.end());
double gpu_min = gpu_times_.front() / getTickFrequency() * 1000.0;
double gpu_max = gpu_times_.back() / getTickFrequency() * 1000.0;
double deviation = 0;
if (gpu_times_.size() > 1)
{
double sum = 0;
for (size_t i = 0; i < gpu_times_.size(); i++)
{
int64 diff = gpu_times_[i] - static_cast<int64>(gpu_elapsed_);
double diff_time = diff * 1000 / getTickFrequency();
sum += diff_time * diff_time;
}
deviation = std::sqrt(sum / gpu_times_.size());
}
printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
num_subtests_called_++;
resetCurrentSubtest();
}
double TestSystem::meanTime(const vector<int64> &samples)
{
double sum = accumulate(samples.begin(), samples.end(), 0.);
return sum / samples.size();
}
void TestSystem::printHeading()
{
cout << endl;
cout << setiosflags(ios_base::left);
#ifdef USE_OPENCL
cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
<< setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP"
<< "DESCRIPTION\n";
#else
cout << TAB << setw(10) << "CPU, ms\n";
#endif
cout << resetiosflags(ios_base::left);
}
void TestSystem::writeHeading()
{
if (!record_)
{
#ifdef USE_OPENCL
recordname_ += "_OCL.csv";
#else
recordname_ += "_CPU.csv";
#endif
record_ = fopen(recordname_.c_str(), "w");
}
#ifdef USE_OPENCL
fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
#else
fprintf(record_, "NAME,DESCRIPTION,CPU (ms)\n");
#endif
fflush(record_);
}
void TestSystem::printSummary()
{
cout << setiosflags(ios_base::fixed);
cout << "\naverage GPU speedup: x"
<< setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
<< endl;
cout << "\nGPU exceeded: "
<< setprecision(3) << speedup_faster_count_
<< "\nGPU passed: "
<< setprecision(3) << speedup_equal_count_
<< "\nGPU failed: "
<< setprecision(3) << speedup_slower_count_
<< endl;
cout << "\nGPU exceeded rate: "
<< setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPU passed rate: "
<< setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPU failed rate: "
<< setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< endl;
cout << "\naverage GPUTOTAL speedup: x"
<< setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
<< endl;
cout << "\nGPUTOTAL exceeded: "
<< setprecision(3) << speedup_full_faster_count_
<< "\nGPUTOTAL passed: "
<< setprecision(3) << speedup_full_equal_count_
<< "\nGPUTOTAL failed: "
<< setprecision(3) << speedup_full_slower_count_
<< endl;
cout << "\nGPUTOTAL exceeded rate: "
<< setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPUTOTAL passed rate: "
<< setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPUTOTAL failed rate: "
<< setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< endl;
cout << resetiosflags(ios_base::fixed);
}
void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
{
cout << TAB << setiosflags(ios_base::left);
stringstream stream;
stream << cpu_time;
cout << setw(10) << stream.str();
#ifdef USE_OPENCL
stream.str("");
stream << gpu_time;
cout << setw(10) << stream.str();
stream.str("");
stream << "x" << setprecision(3) << speedup;
cout << setw(14) << stream.str();
stream.str("");
stream << gpu_full_time;
cout << setw(14) << stream.str();
stream.str("");
stream << "x" << setprecision(3) << fullspeedup;
cout << setw(14) << stream.str();
#endif
cout << cur_subtest_description_.str();
cout << resetiosflags(ios_base::left) << endl;
}
void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
{
if (!record_)
{
recordname_ += ".csv";
record_ = fopen(recordname_.c_str(), "w");
}
#ifdef USE_OPENCL
fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "",
cur_subtest_description_.str().c_str(),
cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
gpu_min, gpu_max, std_dev);
#else
fprintf(record_, "%s,%s,%.3f\n",
itname_changed_ ? itname_.c_str() : "", cur_subtest_description_.str().c_str(), cpu_time);
#endif
if (itname_changed_)
{
itname_changed_ = false;
}
fflush(record_);
}
void TestSystem::writeSummary()
{
if (!record_)
{
recordname_ += ".csv";
record_ = fopen(recordname_.c_str(), "w");
}
fprintf(record_, "\nAverage GPU speedup: %.3f\n"
"exceeded: %d (%.3f%%)\n"
"passed: %d (%.3f%%)\n"
"failed: %d (%.3f%%)\n"
"\nAverage GPUTOTAL speedup: %.3f\n"
"exceeded: %d (%.3f%%)\n"
"passed: %d (%.3f%%)\n"
"failed: %d (%.3f%%)\n",
speedup_total_ / std::max(1, num_subtests_called_),
speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
speedup_full_total_ / std::max(1, num_subtests_called_),
speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
);
fflush(record_);
}
void TestSystem::printError(const std::string &msg)
{
cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
}
void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
{
mat.create(rows, cols, type);
RNG rng(0);
rng.fill(mat, RNG::UNIFORM, low, high);
}
string abspath(const string &relpath)
{
return TestSystem::instance().workingDir() + relpath;
}
int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/,
const char *err_msg, const char * /*file_name*/,
int /*line*/, void * /*userdata*/)
{
TestSystem::instance().printError(err_msg);
return 0;
}
/////////// matchTemplate ////////////////////////
//void InitMatchTemplate()
//{
// Mat src; gen(src, 500, 500, CV_32F, 0, 1);
// Mat templ; gen(templ, 500, 500, CV_32F, 0, 1);
//#ifdef USE_OPENCL
// ocl::oclMat d_src(src), d_templ(templ), d_dst;
// ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
//#endif
//}
TEST(matchTemplate)
{
//InitMatchTemplate();
Mat src, templ, dst;
int templ_size = 5;
for (int size = 1000; size <= 4000; size *= 2)
{
int all_type[] = {CV_32FC1, CV_32FC4};
std::string type_name[] = {"CV_32FC1", "CV_32FC4"};
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
for(templ_size = 5; templ_size < 200; templ_size *= 5)
{
gen(src, size, size, all_type[j], 0, 1);
SUBTEST << src.cols << 'x' << src.rows << "; " << type_name[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR";
gen(templ, templ_size, templ_size, all_type[j], 0, 1);
matchTemplate(src, templ, dst, CV_TM_CCORR);
CPU_ON;
matchTemplate(src, templ, dst, CV_TM_CCORR);
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_src(src), d_templ, d_dst;
d_templ.upload(templ);
WARMUP_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
WARMUP_OFF;
GPU_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_templ.upload(templ);
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
int all_type_8U[] = {CV_8UC1};
std::string type_name_8U[] = {"CV_8UC1"};
for (size_t j = 0; j < sizeof(all_type_8U) / sizeof(int); j++)
{
for(templ_size = 5; templ_size < 200; templ_size *= 5)
{
SUBTEST << src.cols << 'x' << src.rows << "; " << type_name_8U[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR_NORMED";
gen(src, size, size, all_type_8U[j], 0, 255);
gen(templ, templ_size, templ_size, all_type_8U[j], 0, 255);
matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED);
CPU_ON;
matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED);
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_src(src);
ocl::oclMat d_templ(templ), d_dst;
WARMUP_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
WARMUP_OFF;
GPU_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_templ.upload(templ);
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
}
///////////// PyrLKOpticalFlow ////////////////////////
TEST(PyrLKOpticalFlow)
{
std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"};
std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"};
for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++)
{
Mat frame0 = imread(abspath(images1[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE);
if (frame0.empty())
{
std::string errstr = "can't open " + images1[i];
throw runtime_error(errstr);
}
Mat frame1 = imread(abspath(images2[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE);
if (frame1.empty())
{
std::string errstr = "can't open " + images2[i];
throw runtime_error(errstr);
}
Mat gray_frame;
if (i == 0)
{
cvtColor(frame0, gray_frame, COLOR_BGR2GRAY);
}
for (int points = 1000; points <= 4000; points *= 2)
{
if (i == 0)
SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points";
else
SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points";
Mat nextPts_cpu;
Mat status_cpu;
vector<Point2f> pts;
goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0);
vector<Point2f> nextPts;
vector<unsigned char> status;
vector<float> err;
calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
CPU_ON;
calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
CPU_OFF;
#ifdef USE_OPENCL
ocl::PyrLKOpticalFlow d_pyrLK;
ocl::oclMat d_frame0(frame0);
ocl::oclMat d_frame1(frame1);
ocl::oclMat d_pts;
Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]);
d_pts.upload(pts_mat);
ocl::oclMat d_nextPts;
ocl::oclMat d_status;
ocl::oclMat d_err;
WARMUP_ON;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
WARMUP_OFF;
GPU_ON;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
GPU_OFF;
GPU_FULL_ON;
d_frame0.upload(frame0);
d_frame1.upload(frame1);
d_pts.upload(pts_mat);
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
if (!d_nextPts.empty())
{
d_nextPts.download(nextPts_cpu);
}
if (!d_status.empty())
{
d_status.download(status_cpu);
}
GPU_FULL_OFF;
#endif
}
}
}
///////////// pyrDown //////////////////////
TEST(pyrDown)
{
Mat src, dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
pyrDown(src, dst);
CPU_ON;
pyrDown(src, dst);
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_src(src);
ocl::oclMat d_dst;
WARMUP_ON;
ocl::pyrDown(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::pyrDown(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pyrDown(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// pyrUp ////////////////////////
TEST(pyrUp)
{
Mat src, dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 500; size <= 2000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
pyrUp(src, dst);
CPU_ON;
pyrUp(src, dst);
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_src(src);
ocl::oclMat d_dst;
WARMUP_ON;
ocl::pyrUp(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::pyrUp(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pyrUp(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Canny ////////////////////////
TEST(Canny)
{
Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE);
if (img.empty())
{
throw runtime_error("can't open aloeL.jpg");
}
SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1";
Mat edges(img.size(), CV_8UC1);
CPU_ON;
Canny(img, edges, 50.0, 100.0);
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_img(img);
ocl::oclMat d_edges;
ocl::CannyBuf d_buf;
WARMUP_ON;
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
WARMUP_OFF;
GPU_ON;
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
GPU_OFF;
GPU_FULL_ON;
d_img.upload(img);
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
d_edges.download(edges);
GPU_FULL_OFF;
#endif
}
///////////// Haar ////////////////////////
#ifdef USE_OPENCL
namespace cv
{
namespace ocl
{
struct getRect
{
Rect operator()(const CvAvgComp &e) const
{
return e.rect;
}
};
class CascadeClassifier_GPU : public OclCascadeClassifier
{
public:
void detectMultiScale(oclMat &image,
CV_OUT std::vector<cv::Rect>& faces,
double scaleFactor = 1.1,
int minNeighbors = 3, int flags = 0,
Size minSize = Size(),
Size maxSize = Size())
{
(void)maxSize;
MemStorage storage(cvCreateMemStorage(0));
//CvMat img=image;
CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize);
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(objs).copyTo(vecAvgComp);
faces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
}
};
}
}
#endif
TEST(Haar)
{
Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE);
if (img.empty())
{
throw runtime_error("can't open basketball1.png");
}
CascadeClassifier faceCascadeCPU;
if (!faceCascadeCPU.load(abspath("facedetect.xml")))
{
throw runtime_error("can't load facedetect.xml");
}
vector<Rect> faces;
SUBTEST << img.cols << "x" << img.rows << "; scale image";
CPU_ON;
faceCascadeCPU.detectMultiScale(img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
CPU_OFF;
#ifdef USE_OPENCL
ocl::CascadeClassifier_GPU faceCascade;
if (!faceCascade.load(abspath("facedetect.xml")))
{
throw runtime_error("can't load facedetect.xml");
}
ocl::oclMat d_img(img);
faces.clear();
WARMUP_ON;
faceCascade.detectMultiScale(d_img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
WARMUP_OFF;
faces.clear();
GPU_ON;
faceCascade.detectMultiScale(d_img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
GPU_OFF;
GPU_FULL_ON;
d_img.upload(img);
faceCascade.detectMultiScale(d_img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
GPU_FULL_OFF;
#endif
}
///////////// blend ////////////////////////
template <typename T>
void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold)
{
result_gold.create(img1.size(), img1.type());
int cn = img1.channels();
for (int y = 0; y < img1.rows; ++y)
{
const float *weights1_row = weights1.ptr<float>(y);
const float *weights2_row = weights2.ptr<float>(y);
const T *img1_row = img1.ptr<T>(y);
const T *img2_row = img2.ptr<T>(y);
T *result_gold_row = result_gold.ptr<T>(y);
for (int x = 0; x < img1.cols * cn; ++x)
{
float w1 = weights1_row[x / cn];
float w2 = weights2_row[x / cn];
result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f));
}
}
}
TEST(blend)
{
Mat src1, src2, weights1, weights2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " and CV_32FC1";
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(weights1, size, size, CV_32FC1, 0, 1);
gen(weights2, size, size, CV_32FC1, 0, 1);
blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
CPU_ON;
blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
d_weights1.upload(weights1);
d_weights2.upload(weights2);
WARMUP_ON;
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
d_weights1.upload(weights1);
d_weights2.upload(weights2);
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// columnSum////////////////////////
TEST(columnSum)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << "; CV_32FC1";
gen(src, size, size, CV_32FC1, 0, 256);
CPU_ON;
dst.create(src.size(), src.type());
for (int i = 1; i < src.rows; ++i)
{
for (int j = 0; j < src.cols; ++j)
{
dst.at<float>(i, j) = src.at<float>(i, j) += src.at<float>(i - 1, j);
}
}
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::columnSum(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::columnSum(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::columnSum(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
///////////// HOG////////////////////////
TEST(HOG)
{
Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE);
if (src.empty())
{
throw runtime_error("can't open road.png");
}
cv::HOGDescriptor hog;
hog.setSVMDetector(hog.getDefaultPeopleDetector());
std::vector<cv::Rect> found_locations;
SUBTEST << 768 << 'x' << 576 << "; road.png";
hog.detectMultiScale(src, found_locations);
CPU_ON;
hog.detectMultiScale(src, found_locations);
CPU_OFF;
#ifdef USE_OPENCL
cv::ocl::HOGDescriptor ocl_hog;
ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
ocl::oclMat d_src;
d_src.upload(src);
WARMUP_ON;
ocl_hog.detectMultiScale(d_src, found_locations);
WARMUP_OFF;
GPU_ON;
ocl_hog.detectMultiScale(d_src, found_locations);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl_hog.detectMultiScale(d_src, found_locations);
GPU_FULL_OFF;
#endif
}
///////////// SURF ////////////////////////
TEST(SURF)
{
Mat keypoints_cpu;
Mat descriptors_cpu;
Mat src = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE);
if (src.empty())
{
throw runtime_error("can't open aloeL.jpg");
}
SUBTEST << src.cols << "x" << src.rows << "; aloeL.jpg";
SURF surf;
vector<KeyPoint> keypoints;
Mat descriptors;
surf(src, Mat(), keypoints, descriptors);
CPU_ON;
keypoints.clear();
surf(src, Mat(), keypoints, descriptors);
CPU_OFF;
#ifdef USE_OPENCL
ocl::SURF_OCL d_surf;
ocl::oclMat d_src(src);
ocl::oclMat d_keypoints;
ocl::oclMat d_descriptors;
WARMUP_ON;
d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);
WARMUP_OFF;
GPU_ON;
d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);
if (!d_keypoints.empty())
{
d_keypoints.download(keypoints_cpu);
}
if (!d_descriptors.empty())
{
d_descriptors.download(descriptors_cpu);
}
GPU_FULL_OFF;
#endif
}
//////////////////// BruteForceMatch /////////////////
TEST(BruteForceMatcher)
{
Mat trainIdx_cpu;
Mat distance_cpu;
Mat allDist_cpu;
Mat nMatches_cpu;
for (int size = 1000; size <= 4000; size *= 2)
{
// Init CPU matcher
int desc_len = 64;
BFMatcher matcher(NORM_L2);
Mat query;
gen(query, size, desc_len, CV_32F, 0, 1);
Mat train;
gen(train, size, desc_len, CV_32F, 0, 1);
// Output
vector< vector<DMatch> > matches(2);
#ifdef USE_OPENCL
// Init GPU matcher
ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
ocl::oclMat d_query(query);
ocl::oclMat d_train(train);
ocl::oclMat d_trainIdx, d_distance, d_allDist, d_nMatches;
#endif
SUBTEST << size << "; match";
matcher.match(query, train, matches[0]);
CPU_ON;
matcher.match(query, train, matches[0]);
CPU_OFF;
#ifdef USE_OPENCL
WARMUP_ON;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
WARMUP_OFF;
GPU_ON;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
d_matcher.match(d_query, d_train, matches[0]);
GPU_FULL_OFF;
#endif
SUBTEST << size << "; knnMatch";
matcher.knnMatch(query, train, matches, 2);
CPU_ON;
matcher.knnMatch(query, train, matches, 2);
CPU_OFF;
#ifdef USE_OPENCL
WARMUP_ON;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
WARMUP_OFF;
GPU_ON;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
d_matcher.knnMatch(d_query, d_train, matches, 2);
GPU_FULL_OFF;
#endif
SUBTEST << size << "; radiusMatch";
float max_distance = 2.0f;
matcher.radiusMatch(query, train, matches, max_distance);
CPU_ON;
matcher.radiusMatch(query, train, matches, max_distance);
CPU_OFF;
#ifdef USE_OPENCL
d_trainIdx.release();
WARMUP_ON;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
WARMUP_OFF;
GPU_ON;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
d_matcher.radiusMatch(d_query, d_train, matches, max_distance);
GPU_FULL_OFF;
#endif
}
}
///////////// Lut ////////////////////////
TEST(lut)
{
Mat src, lut, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_lut, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC3};
std::string type_name[] = {"CV_8UC1", "CV_8UC3"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(src, size, size, all_type[j], 0, 256);
gen(lut, 1, 256, CV_8UC1, 0, 1);
gen(dst, size, size, all_type[j], 0, 256);
LUT(src, lut, dst);
CPU_ON;
LUT(src, lut, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
d_lut.upload(lut);
WARMUP_ON;
ocl::LUT(d_src, d_lut, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::LUT(d_src, d_lut, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_lut.upload(lut);
ocl::LUT(d_src, d_lut, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Exp ////////////////////////
TEST(Exp)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << "; CV_32FC1";
gen(src, size, size, CV_32FC1, 0, 256);
gen(dst, size, size, CV_32FC1, 0, 256);
exp(src, dst);
CPU_ON;
exp(src, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::exp(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::exp(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::exp(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
///////////// LOG ////////////////////////
TEST(Log)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << "; 32F";
gen(src, size, size, CV_32F, 1, 10);
log(src, dst);
CPU_ON;
log(src, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::log(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::log(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::log(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
///////////// Add ////////////////////////
TEST(Add)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(src1, size, size, all_type[j], 0, 1);
gen(src2, size, size, all_type[j], 0, 1);
add(src1, src2, dst);
CPU_ON;
add(src1, src2, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::add(d_src1, d_src2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::add(d_src1, d_src2, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::add(d_src1, d_src2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Mul ////////////////////////
TEST(Mul)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
multiply(src1, src2, dst);
CPU_ON;
multiply(src1, src2, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::multiply(d_src1, d_src2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::multiply(d_src1, d_src2, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::multiply(d_src1, d_src2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Div ////////////////////////
TEST(Div)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
divide(src1, src2, dst);
CPU_ON;
divide(src1, src2, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::divide(d_src1, d_src2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::divide(d_src1, d_src2, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::divide(d_src1, d_src2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Absdiff ////////////////////////
TEST(Absdiff)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
absdiff(src1, src2, dst);
CPU_ON;
absdiff(src1, src2, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::absdiff(d_src1, d_src2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::absdiff(d_src1, d_src2, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::absdiff(d_src1, d_src2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// CartToPolar ////////////////////////
TEST(CartToPolar)
{
Mat src1, src2, dst, dst1;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
#endif
int all_type[] = {CV_32FC1};
std::string type_name[] = {"CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
gen(dst1, size, size, all_type[j], 0, 256);
cartToPolar(src1, src2, dst, dst1, 1);
CPU_ON;
cartToPolar(src1, src2, dst, dst1, 1);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
WARMUP_OFF;
GPU_ON;
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
d_dst.download(dst);
d_dst1.download(dst1);
GPU_FULL_OFF;
#endif
}
}
}
///////////// PolarToCart ////////////////////////
TEST(PolarToCart)
{
Mat src1, src2, dst, dst1;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
#endif
int all_type[] = {CV_32FC1};
std::string type_name[] = {"CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
gen(dst1, size, size, all_type[j], 0, 256);
polarToCart(src1, src2, dst, dst1, 1);
CPU_ON;
polarToCart(src1, src2, dst, dst1, 1);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
WARMUP_OFF;
GPU_ON;
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
d_dst.download(dst);
d_dst1.download(dst1);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Magnitude ////////////////////////
TEST(magnitude)
{
Mat x, y, mag;
#ifdef USE_OPENCL
ocl::oclMat d_x, d_y, d_mag;
#endif
int all_type[] = {CV_32FC1};
std::string type_name[] = {"CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(x, size, size, all_type[j], 0, 1);
gen(y, size, size, all_type[j], 0, 1);
magnitude(x, y, mag);
CPU_ON;
magnitude(x, y, mag);
CPU_OFF;
#ifdef USE_OPENCL
d_x.upload(x);
d_y.upload(y);
WARMUP_ON;
ocl::magnitude(d_x, d_y, d_mag);
WARMUP_OFF;
GPU_ON;
ocl::magnitude(d_x, d_y, d_mag);
GPU_OFF;
GPU_FULL_ON;
d_x.upload(x);
d_y.upload(y);
ocl::magnitude(d_x, d_y, d_mag);
d_mag.download(mag);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Transpose ////////////////////////
TEST(Transpose)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(src, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
transpose(src, dst);
CPU_ON;
transpose(src, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::transpose(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::transpose(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::transpose(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Flip ////////////////////////
TEST(Flip)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; FLIP_BOTH";
gen(src, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
flip(src, dst, 0);
CPU_ON;
flip(src, dst, 0);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::flip(d_src, d_dst, 0);
WARMUP_OFF;
GPU_ON;
ocl::flip(d_src, d_dst, 0);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::flip(d_src, d_dst, 0);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// minMax ////////////////////////
TEST(minMax)
{
Mat src;
#ifdef USE_OPENCL
ocl::oclMat d_src;
#endif
double min_val, max_val;
Point min_loc, max_loc;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(src, size, size, all_type[j], 0, 256);
CPU_ON;
minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::minMax(d_src, &min_val, &max_val);
WARMUP_OFF;
GPU_ON;
ocl::minMax(d_src, &min_val, &max_val);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::minMax(d_src, &min_val, &max_val);
GPU_FULL_OFF;
#endif
}
}
}
///////////// minMaxLoc ////////////////////////
TEST(minMaxLoc)
{
Mat src;
#ifdef USE_OPENCL
ocl::oclMat d_src;
#endif
double min_val, max_val;
Point min_loc, max_loc;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 1);
CPU_ON;
minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
WARMUP_OFF;
GPU_ON;
ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Sum ////////////////////////
TEST(Sum)
{
Mat src;
Scalar cpures, gpures;
#ifdef USE_OPENCL
ocl::oclMat d_src;
#endif
int all_type[] = {CV_8UC1, CV_32SC1};
std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
cpures = sum(src);
CPU_ON;
cpures = sum(src);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
gpures = ocl::sum(d_src);
WARMUP_OFF;
GPU_ON;
gpures = ocl::sum(d_src);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
gpures = ocl::sum(d_src);
GPU_FULL_OFF;
#endif
}
}
}
///////////// countNonZero ////////////////////////
TEST(countNonZero)
{
Mat src;
#ifdef USE_OPENCL
ocl::oclMat d_src;
#endif
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
countNonZero(src);
CPU_ON;
countNonZero(src);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::countNonZero(d_src);
WARMUP_OFF;
GPU_ON;
ocl::countNonZero(d_src);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::countNonZero(d_src);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Phase ////////////////////////
TEST(Phase)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
int all_type[] = {CV_32FC1};
std::string type_name[] = {"CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
phase(src1, src2, dst, 1);
CPU_ON;
phase(src1, src2, dst, 1);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::phase(d_src1, d_src2, d_dst, 1);
WARMUP_OFF;
GPU_ON;
ocl::phase(d_src1, d_src2, d_dst, 1);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::phase(d_src1, d_src2, d_dst, 1);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// bitwise_and////////////////////////
TEST(bitwise_and)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_32SC1};
std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
bitwise_and(src1, src2, dst);
CPU_ON;
bitwise_and(src1, src2, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::bitwise_and(d_src1, d_src2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::bitwise_and(d_src1, d_src2, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::bitwise_and(d_src1, d_src2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// bitwise_or////////////////////////
TEST(bitwise_or)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_32SC1};
std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
bitwise_or(src1, src2, dst);
CPU_ON;
bitwise_or(src1, src2, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::bitwise_or(d_src1, d_src2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::bitwise_or(d_src1, d_src2, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::bitwise_or(d_src1, d_src2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// bitwise_xor////////////////////////
TEST(bitwise_xor)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_32SC1};
std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
bitwise_xor(src1, src2, dst);
CPU_ON;
bitwise_xor(src1, src2, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::bitwise_xor(d_src1, d_src2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::bitwise_xor(d_src1, d_src2, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::bitwise_xor(d_src1, d_src2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// bitwise_not////////////////////////
TEST(bitwise_not)
{
Mat src1, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_32SC1};
std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src1, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
bitwise_not(src1, dst);
CPU_ON;
bitwise_not(src1, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
WARMUP_ON;
ocl::bitwise_not(d_src1, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::bitwise_not(d_src1, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
ocl::bitwise_not(d_src1, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// compare////////////////////////
TEST(compare)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
int CMP_EQ = 0;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
compare(src1, src2, dst, CMP_EQ);
CPU_ON;
compare(src1, src2, dst, CMP_EQ);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
WARMUP_OFF;
GPU_ON;
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// pow ////////////////////////
TEST(pow)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_32FC1};
std::string type_name[] = {"CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 100);
gen(dst, size, size, all_type[j], 0, 100);
pow(src, -2.0, dst);
CPU_ON;
pow(src, -2.0, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
d_dst.upload(dst);
WARMUP_ON;
ocl::pow(d_src, -2.0, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::pow(d_src, -2.0, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pow(d_src, -2.0, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// MagnitudeSqr////////////////////////
TEST(MagnitudeSqr)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
int all_type[] = {CV_32FC1};
std::string type_name[] = {"CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++)
{
SUBTEST << size << 'x' << size << "; " << type_name[t];
gen(src1, size, size, all_type[t], 0, 256);
gen(src2, size, size, all_type[t], 0, 256);
gen(dst, size, size, all_type[t], 0, 256);
for (int i = 0; i < src1.rows; ++i)
for (int j = 0; j < src1.cols; ++j)
{
float val1 = src1.at<float>(i, j);
float val2 = src2.at<float>(i, j);
((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
}
CPU_ON;
for (int i = 0; i < src1.rows; ++i)
for (int j = 0; j < src1.cols; ++j)
{
float val1 = src1.at<float>(i, j);
float val2 = src2.at<float>(i, j);
((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
}
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// AddWeighted////////////////////////
TEST(AddWeighted)
{
Mat src1, src2, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_dst;
#endif
double alpha = 2.0, beta = 1.0, gama = 3.0;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
addWeighted(src1, alpha, src2, beta, gama, dst);
CPU_ON;
addWeighted(src1, alpha, src2, beta, gama, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
WARMUP_ON;
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Blur////////////////////////
TEST(Blur)
{
Mat src1, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_dst;
#endif
Size ksize = Size(3, 3);
int bordertype = BORDER_CONSTANT;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src1, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
blur(src1, dst, ksize, Point(-1, -1), bordertype);
CPU_ON;
blur(src1, dst, ksize, Point(-1, -1), bordertype);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
WARMUP_ON;
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
WARMUP_OFF;
GPU_ON;
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Laplacian////////////////////////
TEST(Laplacian)
{
Mat src1, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_dst;
#endif
int ksize = 3;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src1, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
Laplacian(src1, dst, -1, ksize, 1);
CPU_ON;
Laplacian(src1, dst, -1, ksize, 1);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
WARMUP_ON;
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
WARMUP_OFF;
GPU_ON;
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Erode ////////////////////
TEST(Erode)
{
Mat src, dst, ker;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(256));
ker = getStructuringElement(MORPH_RECT, Size(3, 3));
erode(src, dst, ker);
CPU_ON;
erode(src, dst, ker);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::erode(d_src, d_dst, ker);
WARMUP_OFF;
GPU_ON;
ocl::erode(d_src, d_dst, ker);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::erode(d_src, d_dst, ker);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Sobel ////////////////////////
TEST(Sobel)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int dx = 1;
int dy = 1;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
Sobel(src, dst, -1, dx, dy);
CPU_ON;
Sobel(src, dst, -1, dx, dy);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::Sobel(d_src, d_dst, -1, dx, dy);
WARMUP_OFF;
GPU_ON;
ocl::Sobel(d_src, d_dst, -1, dx, dy);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::Sobel(d_src, d_dst, -1, dx, dy);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Scharr ////////////////////////
TEST(Scharr)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int dx = 1;
int dy = 0;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
Scharr(src, dst, -1, dx, dy);
CPU_ON;
Scharr(src, dst, -1, dx, dy);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::Scharr(d_src, d_dst, -1, dx, dy);
WARMUP_OFF;
GPU_ON;
ocl::Scharr(d_src, d_dst, -1, dx, dy);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::Scharr(d_src, d_dst, -1, dx, dy);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// GaussianBlur ////////////////////////
TEST(GaussianBlur)
{
Mat src, dst;
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
GaussianBlur(src, dst, Size(9, 9), 0);
CPU_ON;
GaussianBlur(src, dst, Size(9, 9), 0);
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_src(src);
ocl::oclMat d_dst(src.size(), src.type());
ocl::oclMat d_buf;
WARMUP_ON;
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
WARMUP_OFF;
GPU_ON;
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// equalizeHist ////////////////////////
TEST(equalizeHist)
{
Mat src, dst;
int all_type[] = {CV_8UC1};
std::string type_name[] = {"CV_8UC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
equalizeHist(src, dst);
CPU_ON;
equalizeHist(src, dst);
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_src(src);
ocl::oclMat d_dst;
ocl::oclMat d_hist;
ocl::oclMat d_buf;
WARMUP_ON;
ocl::equalizeHist(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::equalizeHist(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::equalizeHist(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
/////////// CopyMakeBorder //////////////////////
TEST(CopyMakeBorder)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_dst;
#endif
int bordertype = BORDER_CONSTANT;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
CPU_ON;
copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_src(src);
WARMUP_ON;
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
WARMUP_OFF;
GPU_ON;
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// cornerMinEigenVal ////////////////////////
TEST(cornerMinEigenVal)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_dst;
#endif
int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4);
int borderType = BORDER_REFLECT;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
CPU_ON;
cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_src(src);
WARMUP_ON;
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
WARMUP_OFF;
GPU_ON;
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// cornerHarris ////////////////////////
TEST(cornerHarris)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; BORDER_REFLECT";
gen(src, size, size, all_type[j], 0, 1);
cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT);
CPU_ON;
cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
WARMUP_OFF;
GPU_ON;
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// integral ////////////////////////
TEST(integral)
{
Mat src, sum;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_sum, d_buf;
#endif
int all_type[] = {CV_8UC1};
std::string type_name[] = {"CV_8UC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
integral(src, sum);
CPU_ON;
integral(src, sum);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::integral(d_src, d_sum);
WARMUP_OFF;
GPU_ON;
ocl::integral(d_src, d_sum);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::integral(d_src, d_sum);
d_sum.download(sum);
GPU_FULL_OFF;
#endif
}
}
}
///////////// WarpAffine ////////////////////////
TEST(WarpAffine)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
static const double coeffs[2][3] =
{
{cos(3.14 / 6), -sin(3.14 / 6), 100.0},
{sin(3.14 / 6), cos(3.14 / 6), -100.0}
};
Mat M(2, 3, CV_64F, (void *)coeffs);
int interpolation = INTER_NEAREST;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
Size size1 = Size(size, size);
warpAffine(src, dst, M, size1, interpolation);
CPU_ON;
warpAffine(src, dst, M, size1, interpolation);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
WARMUP_OFF;
GPU_ON;
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// WarpPerspective ////////////////////////
TEST(WarpPerspective)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
static const double coeffs[3][3] =
{
{cos(3.14 / 6), -sin(3.14 / 6), 100.0},
{sin(3.14 / 6), cos(3.14 / 6), -100.0},
{0.0, 0.0, 1.0}
};
Mat M(3, 3, CV_64F, (void *)coeffs);
int interpolation = INTER_NEAREST;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
Size size1 = Size(size, size);
warpPerspective(src, dst, M, size1, interpolation);
CPU_ON;
warpPerspective(src, dst, M, size1, interpolation);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
WARMUP_OFF;
GPU_ON;
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// resize ////////////////////////
TEST(resize)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; up";
gen(src, size, size, all_type[j], 0, 256);
resize(src, dst, Size(), 2.0, 2.0);
CPU_ON;
resize(src, dst, Size(), 2.0, 2.0);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
WARMUP_OFF;
GPU_ON;
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; down";
gen(src, size, size, all_type[j], 0, 256);
resize(src, dst, Size(), 0.5, 0.5);
CPU_ON;
resize(src, dst, Size(), 0.5, 0.5);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
WARMUP_OFF;
GPU_ON;
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// threshold////////////////////////
TEST(threshold)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY";
gen(src, size, size, CV_8U, 0, 100);
threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
CPU_ON;
threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
WARMUP_OFF;
GPU_ON;
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << "; 32FC1; THRESH_TRUNC [NPP]";
gen(src, size, size, CV_32FC1, 0, 100);
threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
CPU_ON;
threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
WARMUP_OFF;
GPU_ON;
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
///////////// meanShiftFiltering////////////////////////
TEST(meanShiftFiltering)
{
int sp = 10, sr = 10;
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4";
gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256));
pyrMeanShiftFiltering(src, dst, sp, sr);
CPU_ON;
pyrMeanShiftFiltering(src, dst, sp, sr);
CPU_OFF;
#ifdef USE_OPENCL
gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
d_src.upload(src);
WARMUP_ON;
ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
WARMUP_OFF;
GPU_ON;
ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
///////////// meanShiftProc////////////////////////
COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
{
int isr2 = sr * sr;
int c0, c1, c2, c3;
int iter;
uchar *ptr = NULL;
uchar *pstart = NULL;
int revx = 0, revy = 0;
c0 = sptr[0];
c1 = sptr[1];
c2 = sptr[2];
c3 = sptr[3];
// iterate meanshift procedure
for (iter = 0; iter < maxIter; iter++)
{
int count = 0;
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
int minx = x0 - sp;
int miny = y0 - sp;
int maxx = x0 + sp;
int maxy = y0 + sp;
//deal with the image boundary
if (minx < 0)
{
minx = 0;
}
if (miny < 0)
{
miny = 0;
}
if (maxx >= size.width)
{
maxx = size.width - 1;
}
if (maxy >= size.height)
{
maxy = size.height - 1;
}
if (iter == 0)
{
pstart = sptr;
}
else
{
pstart = pstart + revy * sstep + (revx << 2); //point to the new position
}
ptr = pstart;
ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
{
int rowCount = 0;
int x = minx;
#if CV_ENABLE_UNROLLED
for (; x + 4 <= maxx; x += 4, ptr += 16)
{
int t0, t1, t2;
t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
rowCount++;
}
t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 1;
rowCount++;
}
t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 2;
rowCount++;
}
t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 3;
rowCount++;
}
}
#endif
for (; x <= maxx; x++, ptr += 4)
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
rowCount++;
}
}
if (rowCount == 0)
{
continue;
}
count += rowCount;
sy += y * rowCount;
}
if (count == 0)
{
break;
}
int x1 = sx / count;
int y1 = sy / count;
s0 = s0 / count;
s1 = s1 / count;
s2 = s2 / count;
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
//revise the pointer corresponding to the new (y0,x0)
revx = x1 - x0;
revy = y1 - y0;
x0 = x1;
y0 = y1;
c0 = s0;
c1 = s1;
c2 = s2;
if (stopFlag)
{
break;
}
} //for iter
dptr[0] = (uchar)c0;
dptr[1] = (uchar)c1;
dptr[2] = (uchar)c2;
dptr[3] = (uchar)c3;
COOR coor;
coor.x = static_cast<short>(x0);
coor.y = static_cast<short>(y0);
return coor;
}
void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
{
if (src_roi.empty())
{
CV_Error(CV_StsBadArg, "The input image is empty");
}
if (src_roi.depth() != CV_8U || src_roi.channels() != 4)
{
CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported");
}
CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
(src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
CV_Assert(!(dstCoor_roi.step & 0x3));
if (!(crit.type & cv::TermCriteria::MAX_ITER))
{
crit.maxCount = 5;
}
int maxIter = std::min(std::max(crit.maxCount, 1), 100);
float eps;
if (!(crit.type & cv::TermCriteria::EPS))
{
eps = 1.f;
}
eps = (float)std::max(crit.epsilon, 0.0);
int tab[512];
for (int i = 0; i < 512; i++)
{
tab[i] = (i - 255) * (i - 255);
}
uchar *sptr = src_roi.data;
uchar *dptr = dst_roi.data;
short *dCoorptr = (short *)dstCoor_roi.data;
int sstep = (int)src_roi.step;
int dstep = (int)dst_roi.step;
int dCoorstep = (int)dstCoor_roi.step >> 1;
cv::Size size = src_roi.size();
for (int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1))
{
for (int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2)
{
*((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
}
}
}
TEST(meanShiftProc)
{
Mat src, dst, dstCoor_roi;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst, d_dstCoor_roi;
#endif
TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1);
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 ";
gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256));
meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
CPU_ON;
meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
WARMUP_OFF;
GPU_ON;
ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
d_dst.download(dst);
d_dstCoor_roi.download(dstCoor_roi);
GPU_FULL_OFF;
#endif
}
}
///////////// ConvertTo////////////////////////
TEST(ConvertTo)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " to 32FC1";
gen(src, size, size, all_type[j], 0, 256);
//gen(dst, size, size, all_type[j], 0, 256);
//d_dst.upload(dst);
src.convertTo(dst, CV_32FC1);
CPU_ON;
src.convertTo(dst, CV_32FC1);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
d_src.convertTo(d_dst, CV_32FC1);
WARMUP_OFF;
GPU_ON;
d_src.convertTo(d_dst, CV_32FC1);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_src.convertTo(d_dst, CV_32FC1);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// copyTo////////////////////////
TEST(copyTo)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
//gen(dst, size, size, all_type[j], 0, 256);
//d_dst.upload(dst);
src.copyTo(dst);
CPU_ON;
src.copyTo(dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
d_src.copyTo(d_dst);
WARMUP_OFF;
GPU_ON;
d_src.copyTo(d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_src.copyTo(d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// setTo////////////////////////
TEST(setTo)
{
Mat src, dst;
Scalar val(1, 2, 3, 4);
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
src.setTo(val);
CPU_ON;
src.setTo(val);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
d_src.setTo(val);
WARMUP_OFF;
GPU_ON;
d_src.setTo(val);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_src.setTo(val);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Merge////////////////////////
TEST(Merge)
{
Mat dst;
#ifdef USE_OPENCL
ocl::oclMat d_dst;
#endif
int channels = 4;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
Size size1 = Size(size, size);
std::vector<Mat> src(channels);
for (int i = 0; i < channels; ++i)
{
src[i] = Mat(size1, all_type[j], cv::Scalar::all(i));
}
merge(src, dst);
CPU_ON;
merge(src, dst);
CPU_OFF;
#ifdef USE_OPENCL
std::vector<ocl::oclMat> d_src(channels);
for (int i = 0; i < channels; ++i)
{
d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i));
}
WARMUP_ON;
ocl::merge(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::merge(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
for (int i = 0; i < channels; ++i)
{
d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i));
}
ocl::merge(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// Split////////////////////////
TEST(Split)
{
//int channels = 4;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
Size size1 = Size(size, size);
Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
std::vector<cv::Mat> dst;
split(src, dst);
CPU_ON;
split(src, dst);
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
std::vector<cv::ocl::oclMat> d_dst;
WARMUP_ON;
ocl::split(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::split(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::split(d_src, d_dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// norm////////////////////////
TEST(norm)
{
Mat src, buf;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_buf;
#endif
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF";
gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
norm(src, NORM_INF);
CPU_ON;
norm(src, NORM_INF);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
d_buf.upload(buf);
WARMUP_ON;
ocl::norm(d_src, d_buf, NORM_INF);
WARMUP_OFF;
GPU_ON;
ocl::norm(d_src, d_buf, NORM_INF);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::norm(d_src, d_buf, NORM_INF);
GPU_FULL_OFF;
#endif
}
}
///////////// remap////////////////////////
TEST(remap)
{
Mat src, dst, xmap, ymap;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst, d_xmap, d_ymap;
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
int interpolation = INTER_LINEAR;
int borderMode = BORDER_CONSTANT;
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++)
{
SUBTEST << size << 'x' << size << "; src " << type_name[t] << "; map CV_32FC1";
gen(src, size, size, all_type[t], 0, 256);
xmap.create(size, size, CV_32FC1);
dst.create(size, size, CV_32FC1);
ymap.create(size, size, CV_32FC1);
for (int i = 0; i < size; ++i)
{
float *xmap_row = xmap.ptr<float>(i);
float *ymap_row = ymap.ptr<float>(i);
for (int j = 0; j < size; ++j)
{
xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
}
}
remap(src, dst, xmap, ymap, interpolation, borderMode);
CPU_ON;
remap(src, dst, xmap, ymap, interpolation, borderMode);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
d_dst.upload(dst);
d_xmap.upload(xmap);
d_ymap.upload(ymap);
WARMUP_ON;
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
WARMUP_OFF;
GPU_ON;
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// cvtColor////////////////////////
TEST(cvtColor)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_8UC4};
std::string type_name[] = {"CV_8UC4"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
gen(src, size, size, all_type[j], 0, 256);
SUBTEST << size << "x" << size << "; " << type_name[j] << " ; CV_RGBA2GRAY";
cvtColor(src, dst, CV_RGBA2GRAY, 4);
CPU_ON;
cvtColor(src, dst, CV_RGBA2GRAY, 4);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
WARMUP_OFF;
GPU_ON;
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// filter2D////////////////////////
TEST(filter2D)
{
Mat src;
for (int size = 1000; size <= 4000; size *= 2)
{
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
gen(src, size, size, all_type[j], 0, 256);
for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1)
{
SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ;
Mat kernel;
gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);
Mat dst;
cv::filter2D(src, dst, -1, kernel);
CPU_ON;
cv::filter2D(src, dst, -1, kernel);
CPU_OFF;
#ifdef USE_OPENCL
ocl::oclMat d_src(src);
ocl::oclMat d_dst;
WARMUP_ON;
ocl::filter2D(d_src, d_dst, -1, kernel);
WARMUP_OFF;
GPU_ON;
ocl::filter2D(d_src, d_dst, -1, kernel);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::filter2D(d_src, d_dst, -1, kernel);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
}
///////////// dft ////////////////////////
TEST(dft)
{
Mat src, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src, d_dst;
#endif
int all_type[] = {CV_32FC1, CV_32FC2};
std::string type_name[] = {"CV_32FC1", "CV_32FC2"};
for (int size = 1000; size <= 4000; size *= 2)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; complex-to-complex";
gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(1));
dft(src, dst);
CPU_ON;
dft(src, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src.upload(src);
WARMUP_ON;
ocl::dft(d_src, d_dst, Size(size, size));
WARMUP_OFF;
GPU_ON;
ocl::dft(d_src, d_dst, Size(size, size));
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::dft(d_src, d_dst, Size(size, size));
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
}
///////////// gemm ////////////////////////
TEST(gemm)
{
Mat src1, src2, src3, dst;
#ifdef USE_OPENCL
ocl::oclMat d_src1, d_src2, d_src3, d_dst;
#endif
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size;
gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gemm(src1, src2, 1.0, src3, 1.0, dst);
CPU_ON;
gemm(src1, src2, 1.0, src3, 1.0, dst);
CPU_OFF;
#ifdef USE_OPENCL
d_src1.upload(src1);
d_src2.upload(src2);
d_src3.upload(src3);
WARMUP_ON;
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
d_src3.upload(src3);
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
#endif
}
}
int main(int argc, const char *argv[])
{
#ifdef USE_OPENCL
vector<ocl::Info> oclinfo;
int num_devices = getDevice(oclinfo);
if (num_devices < 1)
{
cerr << "no device found\n";
return -1;
}
int devidx = 0;
for (size_t i = 0; i < oclinfo.size(); i++)
{
for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++)
{
printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str());
}
}
#endif
redirectError(cvErrorCallback);
const char *keys =
"{ h | help | false | print help message }"
"{ f | filter | | filter for test }"
"{ w | workdir | | set working directory }"
"{ l | list | false | show all tests }"
"{ d | device | 0 | device id }"
"{ i | iters | 10 | iteration count }"
"{ m | warmup | 1 | gpu warm up iteration count}"
"{ t | xtop | 1.1 | xfactor top boundary}"
"{ b | xbottom | 0.9 | xfactor bottom boundary}"
"{ v | verify | false | only run gpu once to verify if problems occur}";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
cout << "Avaible options:" << endl;
cmd.printParams();
return 0;
}
#ifdef USE_OPENCL
int device = cmd.get<int>("device");
if (device < 0 || device >= num_devices)
{
cerr << "Invalid device ID" << endl;
return -1;
}
if (cmd.get<bool>("verify"))
{
TestSystem::instance().setNumIters(1);
TestSystem::instance().setGPUWarmupIters(0);
TestSystem::instance().setCPUIters(0);
}
devidx = 0;
for (size_t i = 0; i < oclinfo.size(); i++)
{
for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++)
{
if (device == devidx)
{
ocl::setDevice(oclinfo[i], j);
TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]);
printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str());
goto END_DEV;
}
}
}
END_DEV:
#endif
string filter = cmd.get<string>("filter");
string workdir = cmd.get<string>("workdir");
bool list = cmd.get<bool>("list");
int iters = cmd.get<int>("iters");
int wu_iters = cmd.get<int>("warmup");
double x_top = cmd.get<double>("xtop");
double x_bottom = cmd.get<double>("xbottom");
TestSystem::instance().setTopThreshold(x_top);
TestSystem::instance().setBottomThreshold(x_bottom);
if (!filter.empty())
{
TestSystem::instance().setTestFilter(filter);
}
if (!workdir.empty())
{
if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
{
workdir += '/';
}
TestSystem::instance().setWorkingDir(workdir);
}
if (list)
{
TestSystem::instance().setListMode(true);
}
TestSystem::instance().setNumIters(iters);
TestSystem::instance().setGPUWarmupIters(wu_iters);
TestSystem::instance().run();
return 0;
}