fixed ocl::integral and enabled perf test for it

This commit is contained in:
Ilya Lavrenov 2013-10-03 19:17:54 +04:00
parent 8224f9843e
commit 10d60f99dc
4 changed files with 84 additions and 80 deletions

View File

@ -198,7 +198,7 @@ PERF_TEST_P(cornerHarrisFixture, cornerHarris,
typedef TestBaseWithParam<Size> integralFixture; typedef TestBaseWithParam<Size> integralFixture;
PERF_TEST_P(integralFixture, DISABLED_integral, OCL_TYPICAL_MAT_SIZES) // TODO does not work properly PERF_TEST_P(integralFixture, integral, OCL_TYPICAL_MAT_SIZES)
{ {
const Size srcSize = GetParam(); const Size srcSize = GetParam();

View File

@ -1141,7 +1141,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
CvSize sz; CvSize sz;
cv::Rect roi, roi2; cv::Rect roi, roi2;
cv::Mat imgroi, imgroisq;
cv::ocl::oclMat resizeroi, gimgroi, gimgroisq; cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
for( int i = 0; i < m_loopcount; i++ ) for( int i = 0; i < m_loopcount; i++ )

View File

@ -975,10 +975,12 @@ namespace cv
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum) void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
{ {
CV_Assert(src.type() == CV_8UC1); CV_Assert(src.type() == CV_8UC1);
if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) if(!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "select device don't support double"); CV_Error(CV_GpuNotSupported, "select device don't support double");
return;
} }
int vlen = 4; int vlen = 4;
int offset = src.offset / vlen; int offset = src.offset / vlen;
int pre_invalid = src.offset % vlen; int pre_invalid = src.offset % vlen;
@ -986,50 +988,45 @@ namespace cv
oclMat t_sum , t_sqsum; oclMat t_sum , t_sqsum;
int w = src.cols + 1, h = src.rows + 1; int w = src.cols + 1, h = src.rows + 1;
int depth; int depth = src.depth() == CV_8U ? CV_32S : CV_64F;
if( src.cols * src.rows <= 2901 * 2901 ) //2901 is the maximum size for int when all values are 255 int type = CV_MAKE_TYPE(depth, 1);
{
t_sum.create(src.cols, src.rows, CV_32SC1);
sum.create(h, w, CV_32SC1);
}
else
{
//Use float to prevent overflow
t_sum.create(src.cols, src.rows, CV_32FC1);
sum.create(h, w, CV_32FC1);
}
t_sqsum.create(src.cols, src.rows, CV_32FC1);
sqsum.create(h, w, CV_32FC1);
depth = sum.depth();
int sum_offset = sum.offset / vlen;
int sqsum_offset = sqsum.offset / vlen;
vector<pair<size_t , const void *> > args; t_sum.create(src.cols, src.rows, type);
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); sum.create(h, w, type);
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); t_sqsum.create(src.cols, src.rows, CV_32FC1);
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); sqsum.create(h, w, CV_32FC1);
args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); int sum_offset = sum.offset / vlen;
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); int sqsum_offset = sqsum.offset / vlen;
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); vector<pair<size_t , const void *> > args;
size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth); args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
args.clear(); args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth);
args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step));
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); args.clear();
args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth); args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step));
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth);
} }
void integral(const oclMat &src, oclMat &sum) void integral(const oclMat &src, oclMat &sum)
@ -1042,39 +1039,35 @@ namespace cv
oclMat t_sum; oclMat t_sum;
int w = src.cols + 1, h = src.rows + 1; int w = src.cols + 1, h = src.rows + 1;
int depth; int depth = src.depth() == CV_8U ? CV_32S : CV_32F;
if(src.cols * src.rows <= 2901 * 2901) int type = CV_MAKE_TYPE(depth, 1);
{
t_sum.create(src.cols, src.rows, CV_32SC1); t_sum.create(src.cols, src.rows, type);
sum.create(h, w, CV_32SC1); sum.create(h, w, type);
}else
{ int sum_offset = sum.offset / vlen;
t_sum.create(src.cols, src.rows, CV_32FC1); vector<pair<size_t , const void *> > args;
sum.create(h, w, CV_32FC1); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
} args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
depth = sum.depth(); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
int sum_offset = sum.offset / vlen; args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth);
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); args.clear();
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth); args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
args.clear(); args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);
} }
/////////////////////// corner ////////////////////////////// /////////////////////// corner //////////////////////////////

View File

@ -579,7 +579,19 @@ TEST_P(cornerHarris, Mat)
struct integral : ImgprocTestBase {}; struct integral : ImgprocTestBase {};
TEST_P(integral, Mat) TEST_P(integral, Mat1)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::ocl::integral(clmat1_roi, cldst_roi);
cv::integral(mat1_roi, dst_roi);
Near(0);
}
}
TEST_P(integral, Mat2)
{ {
for(int j = 0; j < LOOP_TIMES; j++) for(int j = 0; j < LOOP_TIMES; j++)
{ {