update some of the functions in ocl module to the latest version
This commit is contained in:
parent
3fb3851c7a
commit
0fdb55a54d
@ -901,7 +901,7 @@ namespace cv
|
|||||||
oclMat dx_buf, dy_buf;
|
oclMat dx_buf, dy_buf;
|
||||||
oclMat edgeBuf;
|
oclMat edgeBuf;
|
||||||
oclMat trackBuf1, trackBuf2;
|
oclMat trackBuf1, trackBuf2;
|
||||||
oclMat counter;
|
void * counter;
|
||||||
Ptr<FilterEngine_GPU> filterDX, filterDY;
|
Ptr<FilterEngine_GPU> filterDX, filterDY;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -981,6 +981,9 @@ namespace cv
|
|||||||
int nlevels;
|
int nlevels;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
// initialize buffers; only need to do once in case of multiscale detection
|
||||||
|
void init_buffer(const oclMat& img, Size win_stride);
|
||||||
|
|
||||||
void computeBlockHistograms(const oclMat& img);
|
void computeBlockHistograms(const oclMat& img);
|
||||||
void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle);
|
void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle);
|
||||||
|
|
||||||
@ -1004,7 +1007,11 @@ namespace cv
|
|||||||
// Gradients conputation results
|
// Gradients conputation results
|
||||||
oclMat grad, qangle;
|
oclMat grad, qangle;
|
||||||
|
|
||||||
std::vector<oclMat> image_scales;
|
// scaled image
|
||||||
|
oclMat image_scale;
|
||||||
|
|
||||||
|
// effect size of input image (might be different from original size after scaling)
|
||||||
|
Size effect_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
//! Speeded up robust features, port from GPU module.
|
//! Speeded up robust features, port from GPU module.
|
||||||
|
122
modules/ocl/perf/test_blend.cpp
Normal file
122
modules/ocl/perf/test_blend.cpp
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::ocl;
|
||||||
|
using namespace cvtest;
|
||||||
|
using namespace testing;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(Blend, MatType, int)
|
||||||
|
{
|
||||||
|
int type;
|
||||||
|
int channels;
|
||||||
|
std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
|
||||||
|
type = GET_PARAM(0);
|
||||||
|
channels = GET_PARAM(1);
|
||||||
|
//int devnums = getDevice(oclinfo);
|
||||||
|
//CV_Assert(devnums > 0);
|
||||||
|
//cv::ocl::setBinpath(CLBINPATH);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(Blend, Performance)
|
||||||
|
{
|
||||||
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
|
cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
|
||||||
|
cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
|
||||||
|
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
|
||||||
|
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
|
||||||
|
cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
|
||||||
|
cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
|
||||||
|
|
||||||
|
|
||||||
|
double totalgputick_all = 0;
|
||||||
|
double totalgputick_kernel = 0;
|
||||||
|
double t1 = 0;
|
||||||
|
double t2 = 0;
|
||||||
|
|
||||||
|
for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
|
||||||
|
{
|
||||||
|
t1 = (double)cvGetTickCount();
|
||||||
|
cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
|
||||||
|
cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
|
||||||
|
cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
|
||||||
|
cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
|
||||||
|
|
||||||
|
t2 = (double)cvGetTickCount();
|
||||||
|
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;
|
||||||
|
|
||||||
|
cv::Mat m;
|
||||||
|
gdst.download(m);
|
||||||
|
t1 = (double)cvGetTickCount() - t1;
|
||||||
|
|
||||||
|
if (j == 0)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
totalgputick_all = t1 + totalgputick_all;
|
||||||
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
};
|
||||||
|
|
||||||
|
cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
|
||||||
|
Values(CV_8U, CV_32F), Values(1, 4)));
|
||||||
|
#endif
|
155
modules/ocl/perf/test_canny.cpp
Normal file
155
modules/ocl/perf/test_canny.cpp
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include <iomanip>
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::ocl;
|
||||||
|
using namespace cvtest;
|
||||||
|
using namespace testing;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#define FILTER_IMAGE "../../../samples/gpu/road.png"
|
||||||
|
|
||||||
|
#ifndef MWC_TEST_UTILITY
|
||||||
|
#define MWC_TEST_UTILITY
|
||||||
|
|
||||||
|
// Param class
|
||||||
|
#ifndef IMPLEMENT_PARAM_CLASS
|
||||||
|
#define IMPLEMENT_PARAM_CLASS(name, type) \
|
||||||
|
class name \
|
||||||
|
{ \
|
||||||
|
public: \
|
||||||
|
name ( type arg = type ()) : val_(arg) {} \
|
||||||
|
operator type () const {return val_;} \
|
||||||
|
private: \
|
||||||
|
type val_; \
|
||||||
|
}; \
|
||||||
|
inline void PrintTo( name param, std::ostream* os) \
|
||||||
|
{ \
|
||||||
|
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
|
||||||
|
}
|
||||||
|
|
||||||
|
IMPLEMENT_PARAM_CLASS(Channels, int)
|
||||||
|
#endif // IMPLEMENT_PARAM_CLASS
|
||||||
|
#endif // MWC_TEST_UTILITY
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// Canny1
|
||||||
|
|
||||||
|
IMPLEMENT_PARAM_CLASS(AppertureSize, int);
|
||||||
|
IMPLEMENT_PARAM_CLASS(L2gradient, bool);
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
|
||||||
|
{
|
||||||
|
int apperture_size;
|
||||||
|
bool useL2gradient;
|
||||||
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
apperture_size = GET_PARAM(0);
|
||||||
|
useL2gradient = GET_PARAM(1);
|
||||||
|
|
||||||
|
//int devnums = getDevice(oclinfo);
|
||||||
|
//CV_Assert(devnums > 0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(Canny1, Performance)
|
||||||
|
{
|
||||||
|
cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE);
|
||||||
|
ASSERT_FALSE(img.empty());
|
||||||
|
|
||||||
|
double low_thresh = 100.0;
|
||||||
|
double high_thresh = 150.0;
|
||||||
|
|
||||||
|
cv::Mat edges_gold;
|
||||||
|
cv::ocl::oclMat edges;
|
||||||
|
|
||||||
|
double totalgputick=0;
|
||||||
|
double totalgputick_kernel=0;
|
||||||
|
|
||||||
|
double t1=0;
|
||||||
|
double t2=0;
|
||||||
|
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
||||||
|
{
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
|
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
|
||||||
|
|
||||||
|
t2=(double)cvGetTickCount();//kernel
|
||||||
|
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
|
cv::Mat cpu_dst;
|
||||||
|
edges.download (cpu_dst);//download
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
|
if(j == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
totalgputick=t1+totalgputick;
|
||||||
|
|
||||||
|
totalgputick_kernel=t2+totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine(
|
||||||
|
testing::Values(AppertureSize(3), AppertureSize(5)),
|
||||||
|
testing::Values(L2gradient(false), L2gradient(true))));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif //Have opencl
|
120
modules/ocl/perf/test_columnsum.cpp
Normal file
120
modules/ocl/perf/test_columnsum.cpp
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Fangfang Bai fangfang@multicorewareinc.com
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::ocl;
|
||||||
|
using namespace cvtest;
|
||||||
|
using namespace testing;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// ColumnSum
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// ColumnSum
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(ColumnSum)
|
||||||
|
{
|
||||||
|
cv::Mat src;
|
||||||
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
//int devnums = getDevice(oclinfo);
|
||||||
|
//CV_Assert(devnums > 0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(ColumnSum, Performance)
|
||||||
|
{
|
||||||
|
cv::Size size(MWIDTH,MHEIGHT);
|
||||||
|
cv::Mat src = randomMat(size, CV_32FC1);
|
||||||
|
cv::ocl::oclMat d_dst;
|
||||||
|
|
||||||
|
double totalgputick=0;
|
||||||
|
double totalgputick_kernel=0;
|
||||||
|
double t1=0;
|
||||||
|
double t2=0;
|
||||||
|
|
||||||
|
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
||||||
|
{
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
|
cv::ocl::oclMat d_src(src);
|
||||||
|
|
||||||
|
t2=(double)cvGetTickCount();//kernel
|
||||||
|
cv::ocl::columnSum(d_src,d_dst);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
|
cv::Mat cpu_dst;
|
||||||
|
d_dst.download (cpu_dst);//download
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
|
if(j == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
totalgputick=t1+totalgputick;
|
||||||
|
totalgputick_kernel=t2+totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
126
modules/ocl/perf/test_fft.cpp
Normal file
126
modules/ocl/perf/test_fft.cpp
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Fangfangbai, fangfang@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
using namespace std;
|
||||||
|
#ifdef HAVE_CLAMDFFT
|
||||||
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Dft
|
||||||
|
PARAM_TEST_CASE(Dft, cv::Size, bool)
|
||||||
|
{
|
||||||
|
cv::Size dft_size;
|
||||||
|
bool dft_rows;
|
||||||
|
vector<cv::ocl::Info> info;
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
dft_size = GET_PARAM(0);
|
||||||
|
dft_rows = GET_PARAM(1);
|
||||||
|
cv::ocl::getDevice(info);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(Dft, C2C)
|
||||||
|
{
|
||||||
|
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
|
||||||
|
int flags = 0;
|
||||||
|
flags |= dft_rows ? cv::DFT_ROWS : 0;
|
||||||
|
|
||||||
|
cv::ocl::oclMat d_b;
|
||||||
|
|
||||||
|
double totalgputick=0;
|
||||||
|
double totalgputick_kernel=0;
|
||||||
|
double t1=0;
|
||||||
|
double t2=0;
|
||||||
|
|
||||||
|
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
||||||
|
{
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
|
cv::ocl::oclMat ga=cv::ocl::oclMat(a);//upload
|
||||||
|
|
||||||
|
t2=(double)cvGetTickCount();//kernel
|
||||||
|
cv::ocl::dft(ga, d_b, a.size(), flags);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
|
cv::Mat cpu_dst;
|
||||||
|
d_b.download (cpu_dst);//download
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
|
if(j == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
totalgputick=t1+totalgputick;
|
||||||
|
totalgputick_kernel=t2+totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
TEST_P(Dft, R2CthenC2R)
|
||||||
|
{
|
||||||
|
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
|
||||||
|
|
||||||
|
int flags = 0;
|
||||||
|
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
|
||||||
|
|
||||||
|
cv::ocl::oclMat d_b, d_c;
|
||||||
|
|
||||||
|
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
||||||
|
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
|
||||||
|
|
||||||
|
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
|
||||||
|
// testing::Values(cv::Size(1280, 1024), cv::Size(1920, 1080),cv::Size(1800, 1500)),
|
||||||
|
// testing::Values(false, true)));
|
||||||
|
|
||||||
|
#endif // HAVE_CLAMDFFT
|
113
modules/ocl/perf/test_gemm.cpp
Normal file
113
modules/ocl/perf/test_gemm.cpp
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
using namespace std;
|
||||||
|
#ifdef HAVE_CLAMDBLAS
|
||||||
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
// GEMM
|
||||||
|
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
|
||||||
|
{
|
||||||
|
int type;
|
||||||
|
cv::Size mat_size;
|
||||||
|
int flags;
|
||||||
|
vector<cv::ocl::Info> info;
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
type = GET_PARAM(0);
|
||||||
|
mat_size = GET_PARAM(1);
|
||||||
|
flags = GET_PARAM(2);
|
||||||
|
|
||||||
|
cv::ocl::getDevice(info);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(Gemm, Performance)
|
||||||
|
{
|
||||||
|
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
|
||||||
|
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
|
||||||
|
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
|
||||||
|
cv::ocl::oclMat ocl_dst;
|
||||||
|
|
||||||
|
double totalgputick=0;
|
||||||
|
double totalgputick_kernel=0;
|
||||||
|
double t1=0;
|
||||||
|
double t2=0;
|
||||||
|
|
||||||
|
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
||||||
|
{
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
|
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
|
||||||
|
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
|
||||||
|
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
|
||||||
|
|
||||||
|
t2=(double)cvGetTickCount();//kernel
|
||||||
|
cv::ocl::gemm(ga, gb, 1.0,gc, 1.0, ocl_dst, flags);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
|
cv::Mat cpu_dst;
|
||||||
|
ocl_dst.download (cpu_dst);//download
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount() - t1;//gpu end
|
||||||
|
|
||||||
|
if(j == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
totalgputick=t1+totalgputick;
|
||||||
|
totalgputick_kernel=t2+totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
|
||||||
|
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
|
||||||
|
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
|
||||||
|
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
|
||||||
|
#endif
|
218
modules/ocl/perf/test_hog.cpp
Normal file
218
modules/ocl/perf/test_hog.cpp
Normal file
@ -0,0 +1,218 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Intel License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Fangfang BAI, fangfang@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include "opencv2/core/core.hpp"
|
||||||
|
#include <iomanip>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(HOG,cv::Size,int)
|
||||||
|
{
|
||||||
|
cv::Size winSize;
|
||||||
|
int type;
|
||||||
|
std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
winSize = GET_PARAM(0);
|
||||||
|
type = GET_PARAM(1);
|
||||||
|
int devnums = getDevice(oclinfo);
|
||||||
|
CV_Assert(devnums > 0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(HOG, GetDescriptors)
|
||||||
|
{
|
||||||
|
// Load image
|
||||||
|
cv::Mat img_rgb = readImage("D:road.png");
|
||||||
|
ASSERT_FALSE(img_rgb.empty());
|
||||||
|
|
||||||
|
// Convert image
|
||||||
|
cv::Mat img;
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case CV_8UC1:
|
||||||
|
cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
|
||||||
|
break;
|
||||||
|
case CV_8UC4:
|
||||||
|
default:
|
||||||
|
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// HOGs
|
||||||
|
cv::ocl::HOGDescriptor ocl_hog;
|
||||||
|
ocl_hog.gamma_correction = true;
|
||||||
|
|
||||||
|
|
||||||
|
// Compute descriptor
|
||||||
|
cv::ocl::oclMat d_descriptors;
|
||||||
|
//down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows);
|
||||||
|
|
||||||
|
double totalgputick=0;
|
||||||
|
double totalgputick_kernel=0;
|
||||||
|
double t1=0;
|
||||||
|
double t2=0;
|
||||||
|
|
||||||
|
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
||||||
|
{
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
|
cv::ocl::oclMat d_img=cv::ocl::oclMat(img);//upload
|
||||||
|
|
||||||
|
t2=(double)cvGetTickCount();//kernel
|
||||||
|
ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
|
cv::Mat down_descriptors;
|
||||||
|
d_descriptors.download(down_descriptors);
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
|
if(j == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
totalgputick=t1+totalgputick;
|
||||||
|
totalgputick_kernel=t2+totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TEST_P(HOG, Detect)
|
||||||
|
{
|
||||||
|
// Load image
|
||||||
|
cv::Mat img_rgb = readImage("D:road.png");
|
||||||
|
ASSERT_FALSE(img_rgb.empty());
|
||||||
|
|
||||||
|
// Convert image
|
||||||
|
cv::Mat img;
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case CV_8UC1:
|
||||||
|
cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
|
||||||
|
break;
|
||||||
|
case CV_8UC4:
|
||||||
|
default:
|
||||||
|
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// HOGs
|
||||||
|
if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128)))
|
||||||
|
winSize = cv::Size(64, 128);
|
||||||
|
cv::ocl::HOGDescriptor ocl_hog(winSize);
|
||||||
|
ocl_hog.gamma_correction = true;
|
||||||
|
|
||||||
|
cv::HOGDescriptor hog;
|
||||||
|
hog.winSize = winSize;
|
||||||
|
hog.gammaCorrection = true;
|
||||||
|
|
||||||
|
if (winSize.width == 48 && winSize.height == 96)
|
||||||
|
{
|
||||||
|
// daimler's base
|
||||||
|
ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector48x96());
|
||||||
|
hog.setSVMDetector(hog.getDaimlerPeopleDetector());
|
||||||
|
}
|
||||||
|
else if (winSize.width == 64 && winSize.height == 128)
|
||||||
|
{
|
||||||
|
ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector64x128());
|
||||||
|
hog.setSVMDetector(hog.getDefaultPeopleDetector());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
|
||||||
|
hog.setSVMDetector(hog.getDefaultPeopleDetector());
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenCL detection
|
||||||
|
std::vector<cv::Point> d_v_locations;
|
||||||
|
|
||||||
|
double totalgputick=0;
|
||||||
|
double totalgputick_kernel=0;
|
||||||
|
double t1=0;
|
||||||
|
double t2=0;
|
||||||
|
|
||||||
|
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
||||||
|
{
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
|
cv::ocl::oclMat d_img=cv::ocl::oclMat(img);//upload
|
||||||
|
|
||||||
|
t2=(double)cvGetTickCount();//kernel
|
||||||
|
ocl_hog.detect(d_img, d_v_locations, 0);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
if(j == 0)
|
||||||
|
continue;
|
||||||
|
totalgputick=t1+totalgputick;
|
||||||
|
totalgputick_kernel=t2+totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
|
||||||
|
testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
|
||||||
|
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
|
||||||
|
|
||||||
|
|
||||||
|
#endif //HAVE_OPENCL
|
232
modules/ocl/perf/test_match_template.cpp
Normal file
232
modules/ocl/perf/test_match_template.cpp
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include <iomanip>
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::ocl;
|
||||||
|
using namespace cvtest;
|
||||||
|
using namespace testing;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#ifndef MWC_TEST_UTILITY
|
||||||
|
#define MWC_TEST_UTILITY
|
||||||
|
//////// Utility
|
||||||
|
#ifndef DIFFERENT_SIZES
|
||||||
|
#else
|
||||||
|
#undef DIFFERENT_SIZES
|
||||||
|
#endif
|
||||||
|
#define DIFFERENT_SIZES testing::Values(cv::Size(256, 256), cv::Size(3000, 3000))
|
||||||
|
|
||||||
|
// Param class
|
||||||
|
#ifndef IMPLEMENT_PARAM_CLASS
|
||||||
|
#define IMPLEMENT_PARAM_CLASS(name, type) \
|
||||||
|
class name \
|
||||||
|
{ \
|
||||||
|
public: \
|
||||||
|
name ( type arg = type ()) : val_(arg) {} \
|
||||||
|
operator type () const {return val_;} \
|
||||||
|
private: \
|
||||||
|
type val_; \
|
||||||
|
}; \
|
||||||
|
inline void PrintTo( name param, std::ostream* os) \
|
||||||
|
{ \
|
||||||
|
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
|
||||||
|
}
|
||||||
|
|
||||||
|
IMPLEMENT_PARAM_CLASS(Channels, int)
|
||||||
|
#endif // IMPLEMENT_PARAM_CLASS
|
||||||
|
#endif // MWC_TEST_UTILITY
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// MatchTemplate
|
||||||
|
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
|
||||||
|
|
||||||
|
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
|
||||||
|
|
||||||
|
const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod)
|
||||||
|
{
|
||||||
|
cv::Size size;
|
||||||
|
cv::Size templ_size;
|
||||||
|
int cn;
|
||||||
|
int method;
|
||||||
|
//vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
size = GET_PARAM(0);
|
||||||
|
templ_size = GET_PARAM(1);
|
||||||
|
cn = GET_PARAM(2);
|
||||||
|
method = GET_PARAM(3);
|
||||||
|
//int devnums = getDevice(oclinfo);
|
||||||
|
//CV_Assert(devnums > 0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
struct MatchTemplate8U : MatchTemplate {};
|
||||||
|
|
||||||
|
TEST_P(MatchTemplate8U, Performance)
|
||||||
|
{
|
||||||
|
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
|
||||||
|
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
|
||||||
|
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
|
||||||
|
std::cout << "Channels: " << cn << std::endl;
|
||||||
|
|
||||||
|
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
|
||||||
|
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
|
||||||
|
cv::Mat dst_gold;
|
||||||
|
cv::ocl::oclMat dst;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double totalgputick=0;
|
||||||
|
double totalgputick_kernel=0;
|
||||||
|
|
||||||
|
double t1=0;
|
||||||
|
double t2=0;
|
||||||
|
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
||||||
|
{
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
|
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
|
||||||
|
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
|
||||||
|
|
||||||
|
t2=(double)cvGetTickCount();//kernel
|
||||||
|
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
|
cv::Mat cpu_dst;
|
||||||
|
dst.download (cpu_dst);//download
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
|
if(j == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
totalgputick=t1+totalgputick;
|
||||||
|
totalgputick_kernel=t2+totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct MatchTemplate32F : MatchTemplate {};
|
||||||
|
TEST_P(MatchTemplate32F, Performance)
|
||||||
|
{
|
||||||
|
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
|
||||||
|
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
|
||||||
|
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
|
||||||
|
std::cout << "Channels: " << cn << std::endl;
|
||||||
|
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
|
||||||
|
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
|
||||||
|
|
||||||
|
cv::Mat dst_gold;
|
||||||
|
cv::ocl::oclMat dst;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double totalgputick=0;
|
||||||
|
double totalgputick_kernel=0;
|
||||||
|
|
||||||
|
double t1=0;
|
||||||
|
double t2=0;
|
||||||
|
for(int j = 0; j < LOOP_TIMES; j ++)
|
||||||
|
{
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
|
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
|
||||||
|
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
|
||||||
|
|
||||||
|
t2=(double)cvGetTickCount();//kernel
|
||||||
|
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
|
cv::Mat cpu_dst;
|
||||||
|
dst.download (cpu_dst);//download
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
|
totalgputick=t1+totalgputick;
|
||||||
|
|
||||||
|
totalgputick_kernel=t2+totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
|
||||||
|
testing::Combine(
|
||||||
|
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)),
|
||||||
|
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
||||||
|
testing::Values(Channels(1), Channels(4)/*, Channels(3)*/),
|
||||||
|
ALL_TEMPLATE_METHODS
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
|
||||||
|
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)),
|
||||||
|
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
||||||
|
testing::Values(Channels(1), Channels(4) /*, Channels(3)*/),
|
||||||
|
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
|
||||||
|
|
||||||
|
#endif //HAVE_OPENCL
|
137
modules/ocl/perf/test_pyrdown.cpp
Normal file
137
modules/ocl/perf/test_pyrdown.cpp
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// fangfang bai, fangfang@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::ocl;
|
||||||
|
using namespace cvtest;
|
||||||
|
using namespace testing;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(PyrDown, MatType, int)
|
||||||
|
{
|
||||||
|
int type;
|
||||||
|
int channels;
|
||||||
|
//src mat
|
||||||
|
cv::Mat mat1;
|
||||||
|
cv::Mat dst;
|
||||||
|
|
||||||
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
//ocl dst mat for testing
|
||||||
|
|
||||||
|
cv::ocl::oclMat gmat1;
|
||||||
|
cv::ocl::oclMat gdst;
|
||||||
|
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
type = GET_PARAM(0);
|
||||||
|
channels = GET_PARAM(1);
|
||||||
|
//int devnums = getDevice(oclinfo);
|
||||||
|
//CV_Assert(devnums > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#define VARNAME(A) string(#A);
|
||||||
|
|
||||||
|
////////////////////////////////PyrDown/////////////////////////////////////////////////
|
||||||
|
TEST_P(PyrDown, Mat)
|
||||||
|
{
|
||||||
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
|
mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
|
||||||
|
|
||||||
|
|
||||||
|
cv::ocl::oclMat gdst;
|
||||||
|
double totalgputick = 0;
|
||||||
|
double totalgputick_kernel = 0;
|
||||||
|
|
||||||
|
double t1 = 0;
|
||||||
|
double t2 = 0;
|
||||||
|
|
||||||
|
for (int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
|
{
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
|
cv::ocl::oclMat gmat1(mat1);
|
||||||
|
|
||||||
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
|
cv::ocl::pyrDown(gmat1, gdst);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
|
cv::Mat cpu_dst;
|
||||||
|
gdst.download(cpu_dst);
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
|
if (j == 0)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
totalgputick = t1 + totalgputick;
|
||||||
|
|
||||||
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//********test****************
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
|
||||||
|
Values(CV_8U, CV_32F), Values(1, 4)));
|
||||||
|
|
||||||
|
|
||||||
|
#endif // HAVE_OPENCL
|
122
modules/ocl/perf/test_pyrup.cpp
Normal file
122
modules/ocl/perf/test_pyrup.cpp
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// fangfang bai fangfang@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "opencv2/core/core.hpp"
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include <iomanip>
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::ocl;
|
||||||
|
using namespace cvtest;
|
||||||
|
using namespace testing;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(PyrUp, MatType, int)
|
||||||
|
{
|
||||||
|
int type;
|
||||||
|
int channels;
|
||||||
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
type = GET_PARAM(0);
|
||||||
|
channels = GET_PARAM(1);
|
||||||
|
//int devnums = getDevice(oclinfo);
|
||||||
|
//CV_Assert(devnums > 0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(PyrUp, Performance)
|
||||||
|
{
|
||||||
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
|
cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels));
|
||||||
|
cv::Mat dst_gold;
|
||||||
|
cv::ocl::oclMat dst;
|
||||||
|
|
||||||
|
|
||||||
|
double totalgputick = 0;
|
||||||
|
double totalgputick_kernel = 0;
|
||||||
|
|
||||||
|
double t1 = 0;
|
||||||
|
double t2 = 0;
|
||||||
|
|
||||||
|
for (int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
|
{
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
|
cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload
|
||||||
|
|
||||||
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
|
cv::ocl::pyrUp(srcMat, dst);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
|
cv::Mat cpu_dst;
|
||||||
|
dst.download(cpu_dst); //download
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
|
if (j == 0)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
totalgputick = t1 + totalgputick;
|
||||||
|
|
||||||
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine(
|
||||||
|
Values(CV_8U, CV_32F), Values(1, 4)));
|
||||||
|
|
||||||
|
#endif // HAVE_OPENCL
|
@ -88,11 +88,11 @@ void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
|
|||||||
Mat kx, ky;
|
Mat kx, ky;
|
||||||
if (!filterDX)
|
if (!filterDX)
|
||||||
{
|
{
|
||||||
filterDX = createDerivFilter_GPU(CV_32F, CV_32F, 1, 0, apperture_size, BORDER_REPLICATE);
|
filterDX = createDerivFilter_GPU(CV_8U, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);
|
||||||
}
|
}
|
||||||
if (!filterDY)
|
if (!filterDY)
|
||||||
{
|
{
|
||||||
filterDY = createDerivFilter_GPU(CV_32F, CV_32F, 0, 1, apperture_size, BORDER_REPLICATE);
|
filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1);
|
edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1);
|
||||||
@ -100,7 +100,10 @@ void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
|
|||||||
trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2);
|
trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2);
|
||||||
trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2);
|
trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2);
|
||||||
|
|
||||||
counter.create(1,1, CV_32SC1);
|
float counter_f [1] = { 0 };
|
||||||
|
int err = 0;
|
||||||
|
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err );
|
||||||
|
openCLSafeCall(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::CannyBuf::release()
|
void cv::ocl::CannyBuf::release()
|
||||||
@ -112,7 +115,7 @@ void cv::ocl::CannyBuf::release()
|
|||||||
edgeBuf.release();
|
edgeBuf.release();
|
||||||
trackBuf1.release();
|
trackBuf1.release();
|
||||||
trackBuf2.release();
|
trackBuf2.release();
|
||||||
counter.release();
|
openCLFree(counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv { namespace ocl {
|
namespace cv { namespace ocl {
|
||||||
@ -125,9 +128,9 @@ namespace cv { namespace ocl {
|
|||||||
|
|
||||||
void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh);
|
void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh);
|
||||||
|
|
||||||
void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols);
|
void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols);
|
||||||
|
|
||||||
void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols);
|
void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols);
|
||||||
|
|
||||||
void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols);
|
void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols);
|
||||||
}
|
}
|
||||||
@ -164,11 +167,10 @@ void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_th
|
|||||||
std::swap( low_thresh, high_thresh );
|
std::swap( low_thresh, high_thresh );
|
||||||
|
|
||||||
dst.create(src.size(), CV_8U);
|
dst.create(src.size(), CV_8U);
|
||||||
dst.setTo(Scalar::all(0));
|
//dst.setTo(Scalar::all(0));
|
||||||
|
|
||||||
buf.create(src.size(), apperture_size);
|
buf.create(src.size(), apperture_size);
|
||||||
buf.edgeBuf.setTo(Scalar::all(0));
|
//buf.edgeBuf.setTo(Scalar::all(0));
|
||||||
buf.counter.setTo(Scalar::all(0));
|
|
||||||
|
|
||||||
if (apperture_size == 3)
|
if (apperture_size == 3)
|
||||||
{
|
{
|
||||||
@ -178,17 +180,8 @@ void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_th
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// FIXME:
|
buf.filterDX->apply(src, buf.dx);
|
||||||
// current ocl implementation requires the src and dst having same type
|
buf.filterDY->apply(src, buf.dy);
|
||||||
// convertTo is time consuming so this may be optimized later.
|
|
||||||
oclMat src_omat32f = src;
|
|
||||||
src.convertTo(src_omat32f, CV_32F); // FIXME
|
|
||||||
|
|
||||||
buf.filterDX->apply(src_omat32f, buf.dx);
|
|
||||||
buf.filterDY->apply(src_omat32f, buf.dy);
|
|
||||||
|
|
||||||
buf.dx.convertTo(buf.dx, CV_32S); // FIXME
|
|
||||||
buf.dy.convertTo(buf.dy, CV_32S); // FIXME
|
|
||||||
|
|
||||||
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
|
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
|
||||||
}
|
}
|
||||||
@ -210,12 +203,11 @@ void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& d
|
|||||||
std::swap( low_thresh, high_thresh);
|
std::swap( low_thresh, high_thresh);
|
||||||
|
|
||||||
dst.create(dx.size(), CV_8U);
|
dst.create(dx.size(), CV_8U);
|
||||||
dst.setTo(Scalar::all(0));
|
//dst.setTo(Scalar::all(0));
|
||||||
|
|
||||||
buf.dx = dx; buf.dy = dy;
|
buf.dx = dx; buf.dy = dy;
|
||||||
buf.create(dx.size(), -1);
|
buf.create(dx.size(), -1);
|
||||||
buf.edgeBuf.setTo(Scalar::all(0));
|
//buf.edgeBuf.setTo(Scalar::all(0));
|
||||||
buf.counter.setTo(Scalar::all(0));
|
|
||||||
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
|
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
|
||||||
|
|
||||||
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
|
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
|
||||||
@ -342,7 +334,7 @@ void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int ro
|
|||||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols)
|
void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols)
|
||||||
{
|
{
|
||||||
Context *clCxt = map.clCxt;
|
Context *clCxt = map.clCxt;
|
||||||
string kernelName = "edgesHysteresisLocal";
|
string kernelName = "edgesHysteresisLocal";
|
||||||
@ -350,7 +342,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter,
|
|||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||||
@ -362,10 +354,10 @@ void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter,
|
|||||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols)
|
void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols)
|
||||||
{
|
{
|
||||||
unsigned int count = Mat(counter).at<unsigned int>(0);
|
unsigned int count;
|
||||||
|
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
|
||||||
Context *clCxt = map.clCxt;
|
Context *clCxt = map.clCxt;
|
||||||
string kernelName = "edgesHysteresisGlobal";
|
string kernelName = "edgesHysteresisGlobal";
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
@ -375,13 +367,13 @@ void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, ocl
|
|||||||
|
|
||||||
while(count > 0)
|
while(count > 0)
|
||||||
{
|
{
|
||||||
counter.setTo(0);
|
//counter.setTo(0);
|
||||||
args.clear();
|
args.clear();
|
||||||
size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
|
size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&count));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&count));
|
||||||
@ -389,7 +381,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, ocl
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
count = Mat(counter).at<unsigned int>(0);
|
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
|
||||||
std::swap(st1, st2);
|
std::swap(st1, st2);
|
||||||
}
|
}
|
||||||
#undef DIVUP
|
#undef DIVUP
|
||||||
|
@ -67,7 +67,9 @@ namespace cv
|
|||||||
|
|
||||||
void cv::ocl::columnSum(const oclMat& src,oclMat& dst)
|
void cv::ocl::columnSum(const oclMat& src,oclMat& dst)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_32FC1 && dst.type() == CV_32FC1 && src.size() == dst.size());
|
CV_Assert(src.type() == CV_32FC1);
|
||||||
|
|
||||||
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
|
|
||||||
|
@ -119,6 +119,8 @@ namespace cv { namespace ocl { namespace device
|
|||||||
float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
|
float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
|
||||||
void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img,
|
void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img,
|
||||||
float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
|
float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
|
||||||
|
|
||||||
|
void resize( const oclMat &src, oclMat &dst, const Size sz);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -150,6 +152,8 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
|
|||||||
|
|
||||||
cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
|
cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
|
||||||
hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
|
hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
|
||||||
|
|
||||||
|
effect_size = Size(0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
|
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
|
||||||
@ -199,22 +203,37 @@ void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float>& _detector)
|
|||||||
CV_Assert(checkDetectorSize());
|
CV_Assert(checkDetectorSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cv::ocl::HOGDescriptor::init_buffer(const oclMat& img, Size win_stride)
|
||||||
|
{
|
||||||
|
if (!image_scale.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (effect_size == Size(0, 0))
|
||||||
|
effect_size = img.size();
|
||||||
|
|
||||||
|
grad.create(img.size(), CV_32FC2);
|
||||||
|
qangle.create(img.size(), CV_8UC2);
|
||||||
|
|
||||||
|
const size_t block_hist_size = getBlockHistogramSize();
|
||||||
|
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
|
||||||
|
block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
|
||||||
|
|
||||||
|
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
|
||||||
|
labels.create(1, wins_per_img.area(), CV_8U);
|
||||||
|
}
|
||||||
|
|
||||||
void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle)
|
void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle)
|
||||||
{
|
{
|
||||||
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
|
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
|
||||||
|
|
||||||
grad.create(img.size(), CV_32FC2);
|
|
||||||
|
|
||||||
qangle.create(img.size(), CV_8UC2);
|
|
||||||
|
|
||||||
float angleScale = (float)(nbins / CV_PI);
|
float angleScale = (float)(nbins / CV_PI);
|
||||||
switch (img.type())
|
switch (img.type())
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
hog::compute_gradients_8UC1(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
|
hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
|
||||||
break;
|
break;
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
hog::compute_gradients_8UC4(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
|
hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -224,14 +243,11 @@ void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat& img)
|
|||||||
{
|
{
|
||||||
computeGradient(img, grad, qangle);
|
computeGradient(img, grad, qangle);
|
||||||
|
|
||||||
size_t block_hist_size = getBlockHistogramSize();
|
hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
|
||||||
Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
|
grad, qangle, (float)getWinSigma(), block_hists);
|
||||||
|
|
||||||
block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
|
hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
|
||||||
|
block_hists, (float)threshold_L2hys);
|
||||||
hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, grad, qangle, (float)getWinSigma(), block_hists);
|
|
||||||
|
|
||||||
hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, block_hists, (float)threshold_L2hys);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -239,11 +255,13 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride,
|
|||||||
{
|
{
|
||||||
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
|
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
|
||||||
|
|
||||||
|
init_buffer(img, win_stride);
|
||||||
|
|
||||||
computeBlockHistograms(img);
|
computeBlockHistograms(img);
|
||||||
|
|
||||||
const size_t block_hist_size = getBlockHistogramSize();
|
const size_t block_hist_size = getBlockHistogramSize();
|
||||||
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
|
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
|
||||||
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
|
Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
|
||||||
|
|
||||||
descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
|
descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
|
||||||
|
|
||||||
@ -251,11 +269,11 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride,
|
|||||||
{
|
{
|
||||||
case DESCR_FORMAT_ROW_BY_ROW:
|
case DESCR_FORMAT_ROW_BY_ROW:
|
||||||
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
|
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
|
||||||
win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors);
|
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
|
||||||
break;
|
break;
|
||||||
case DESCR_FORMAT_COL_BY_COL:
|
case DESCR_FORMAT_COL_BY_COL:
|
||||||
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
|
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
|
||||||
win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors);
|
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsBadArg, "Unknown descriptor format");
|
CV_Error(CV_StsBadArg, "Unknown descriptor format");
|
||||||
@ -272,22 +290,21 @@ void cv::ocl::HOGDescriptor::detect(const oclMat& img, vector<Point>& hits, doub
|
|||||||
if (detector.empty())
|
if (detector.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
computeBlockHistograms(img);
|
|
||||||
|
|
||||||
if (win_stride == Size())
|
if (win_stride == Size())
|
||||||
win_stride = block_stride;
|
win_stride = block_stride;
|
||||||
else
|
else
|
||||||
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
|
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
|
||||||
|
init_buffer(img, win_stride);
|
||||||
|
|
||||||
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
|
computeBlockHistograms(img);
|
||||||
labels.create(1, wins_per_img.area(), CV_8U);
|
|
||||||
|
|
||||||
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
|
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
|
||||||
win_stride.height, win_stride.width, img.rows, img.cols, block_hists,
|
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists,
|
||||||
detector, (float)free_coef, (float)hit_threshold, labels);
|
detector, (float)free_coef, (float)hit_threshold, labels);
|
||||||
|
|
||||||
labels.download(labels_host);
|
labels.download(labels_host);
|
||||||
unsigned char* vec = labels_host.ptr();
|
unsigned char* vec = labels_host.ptr();
|
||||||
|
Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
|
||||||
for (int i = 0; i < wins_per_img.area(); i++)
|
for (int i = 0; i < wins_per_img.area(); i++)
|
||||||
{
|
{
|
||||||
int y = i / wins_per_img.width;
|
int y = i / wins_per_img.width;
|
||||||
@ -303,6 +320,7 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector<Rect>& f
|
|||||||
Size win_stride, Size padding, double scale0, int group_threshold)
|
Size win_stride, Size padding, double scale0, int group_threshold)
|
||||||
{
|
{
|
||||||
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
|
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
|
||||||
|
CV_Assert(scale0 > 1);
|
||||||
|
|
||||||
vector<double> level_scale;
|
vector<double> level_scale;
|
||||||
double scale = 1.;
|
double scale = 1.;
|
||||||
@ -318,27 +336,30 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector<Rect>& f
|
|||||||
}
|
}
|
||||||
levels = std::max(levels, 1);
|
levels = std::max(levels, 1);
|
||||||
level_scale.resize(levels);
|
level_scale.resize(levels);
|
||||||
image_scales.resize(levels);
|
|
||||||
|
|
||||||
std::vector<Rect> all_candidates;
|
std::vector<Rect> all_candidates;
|
||||||
vector<Point> locations;
|
vector<Point> locations;
|
||||||
|
|
||||||
|
if (win_stride == Size())
|
||||||
|
win_stride = block_stride;
|
||||||
|
else
|
||||||
|
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
|
||||||
|
init_buffer(img, win_stride);
|
||||||
|
image_scale.create(img.size(), img.type());
|
||||||
|
|
||||||
for (size_t i = 0; i < level_scale.size(); i++)
|
for (size_t i = 0; i < level_scale.size(); i++)
|
||||||
{
|
{
|
||||||
scale = level_scale[i];
|
scale = level_scale[i];
|
||||||
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
|
effect_size = Size(cvRound(img.cols / scale), cvRound(img.rows / scale));
|
||||||
oclMat smaller_img;
|
if (effect_size == img.size())
|
||||||
|
{
|
||||||
if (sz == img.size())
|
detect(img, locations, hit_threshold, win_stride, padding);
|
||||||
smaller_img = img;
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
image_scales[i].create(sz, img.type());
|
hog::resize( img, image_scale, effect_size);
|
||||||
resize(img, image_scales[i], image_scales[i].size(), 0, 0, INTER_LINEAR);
|
detect(image_scale, locations, hit_threshold, win_stride, padding);
|
||||||
smaller_img = image_scales[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
detect(smaller_img, locations, hit_threshold, win_stride, padding);
|
|
||||||
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
|
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
|
||||||
for (size_t j = 0; j < locations.size(); j++)
|
for (size_t j = 0; j < locations.size(); j++)
|
||||||
all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size));
|
all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size));
|
||||||
@ -1784,4 +1805,36 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c
|
|||||||
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz)
|
||||||
|
{
|
||||||
|
CV_Assert( (src.channels() == dst.channels()) );
|
||||||
|
Context *clCxt = Context::getContext();
|
||||||
|
|
||||||
|
string kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel";
|
||||||
|
size_t blkSizeX = 16, blkSizeY = 16;
|
||||||
|
size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX;
|
||||||
|
size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY;
|
||||||
|
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
|
||||||
|
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
|
||||||
|
|
||||||
|
float ifx = (float)src.cols / sz.width;
|
||||||
|
float ify = (float)src.rows / sz.height;
|
||||||
|
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||||
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width));
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&sz.height));
|
||||||
|
args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
|
||||||
|
args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
|
||||||
|
|
||||||
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -67,32 +67,6 @@ __kernel void BlendLinear_C1_D0(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void BlendLinear_C3_D0(
|
|
||||||
__global uchar *dst,
|
|
||||||
__global uchar *img1,
|
|
||||||
__global uchar *img2,
|
|
||||||
__global float *weight1,
|
|
||||||
__global float *weight2,
|
|
||||||
int rows,
|
|
||||||
int cols,
|
|
||||||
int istep,
|
|
||||||
int wstep
|
|
||||||
)
|
|
||||||
{
|
|
||||||
int idx = get_global_id(0);
|
|
||||||
int idy = get_global_id(1);
|
|
||||||
int x = idx / 3;
|
|
||||||
int y = idy;
|
|
||||||
if (x < cols && y < rows)
|
|
||||||
{
|
|
||||||
int pos = idy * istep + idx;
|
|
||||||
int wpos = idy * (wstep /sizeof(float)) + x;
|
|
||||||
float w1 = weight1[wpos];
|
|
||||||
float w2 = weight2[wpos];
|
|
||||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void BlendLinear_C4_D0(
|
__kernel void BlendLinear_C4_D0(
|
||||||
__global uchar *dst,
|
__global uchar *dst,
|
||||||
__global uchar *img1,
|
__global uchar *img1,
|
||||||
@ -143,32 +117,6 @@ __kernel void BlendLinear_C1_D5(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void BlendLinear_C3_D5(
|
|
||||||
__global float *dst,
|
|
||||||
__global float *img1,
|
|
||||||
__global float *img2,
|
|
||||||
__global float *weight1,
|
|
||||||
__global float *weight2,
|
|
||||||
int rows,
|
|
||||||
int cols,
|
|
||||||
int istep,
|
|
||||||
int wstep
|
|
||||||
)
|
|
||||||
{
|
|
||||||
int idx = get_global_id(0);
|
|
||||||
int idy = get_global_id(1);
|
|
||||||
int x = idx / 3;
|
|
||||||
int y = idy;
|
|
||||||
if (x < cols && y < rows)
|
|
||||||
{
|
|
||||||
int pos = idy * (istep / sizeof(float)) + idx;
|
|
||||||
int wpos = idy * (wstep /sizeof(float)) + x;
|
|
||||||
float w1 = weight1[wpos];
|
|
||||||
float w2 = weight2[wpos];
|
|
||||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void BlendLinear_C4_D5(
|
__kernel void BlendLinear_C4_D5(
|
||||||
__global float *dst,
|
__global float *dst,
|
||||||
__global float *img1,
|
__global float *img1,
|
||||||
@ -194,3 +142,4 @@ __kernel void BlendLinear_C4_D5(
|
|||||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,8 +70,8 @@ inline float calc(int x, int y)
|
|||||||
// dx_buf output dx buffer
|
// dx_buf output dx buffer
|
||||||
// dy_buf output dy buffer
|
// dy_buf output dy buffer
|
||||||
__kernel
|
__kernel
|
||||||
void calcSobelRowPass
|
void calcSobelRowPass
|
||||||
(
|
(
|
||||||
__global const uchar * src,
|
__global const uchar * src,
|
||||||
__global int * dx_buf,
|
__global int * dx_buf,
|
||||||
__global int * dy_buf,
|
__global int * dy_buf,
|
||||||
@ -83,7 +83,7 @@ void calcSobelRowPass
|
|||||||
int dx_buf_offset,
|
int dx_buf_offset,
|
||||||
int dy_buf_step,
|
int dy_buf_step,
|
||||||
int dy_buf_offset
|
int dy_buf_offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
//src_step /= sizeof(*src);
|
//src_step /= sizeof(*src);
|
||||||
//src_offset /= sizeof(*src);
|
//src_offset /= sizeof(*src);
|
||||||
@ -128,8 +128,8 @@ void calcSobelRowPass
|
|||||||
// dy direvitive in y direction output
|
// dy direvitive in y direction output
|
||||||
// mag magnitude direvitive of xy output
|
// mag magnitude direvitive of xy output
|
||||||
__kernel
|
__kernel
|
||||||
void calcMagnitude_buf
|
void calcMagnitude_buf
|
||||||
(
|
(
|
||||||
__global const int * dx_buf,
|
__global const int * dx_buf,
|
||||||
__global const int * dy_buf,
|
__global const int * dy_buf,
|
||||||
__global int * dx,
|
__global int * dx,
|
||||||
@ -147,7 +147,7 @@ void calcMagnitude_buf
|
|||||||
int dy_offset,
|
int dy_offset,
|
||||||
int mag_step,
|
int mag_step,
|
||||||
int mag_offset
|
int mag_offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
dx_buf_step /= sizeof(*dx_buf);
|
dx_buf_step /= sizeof(*dx_buf);
|
||||||
dx_buf_offset /= sizeof(*dx_buf);
|
dx_buf_offset /= sizeof(*dx_buf);
|
||||||
@ -205,8 +205,8 @@ void calcMagnitude_buf
|
|||||||
// dy direvitive in y direction output
|
// dy direvitive in y direction output
|
||||||
// mag magnitude direvitive of xy output
|
// mag magnitude direvitive of xy output
|
||||||
__kernel
|
__kernel
|
||||||
void calcMagnitude
|
void calcMagnitude
|
||||||
(
|
(
|
||||||
__global const int * dx,
|
__global const int * dx,
|
||||||
__global const int * dy,
|
__global const int * dy,
|
||||||
__global float * mag,
|
__global float * mag,
|
||||||
@ -218,7 +218,7 @@ void calcMagnitude
|
|||||||
int dy_offset,
|
int dy_offset,
|
||||||
int mag_step,
|
int mag_step,
|
||||||
int mag_offset
|
int mag_offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
dx_step /= sizeof(*dx);
|
dx_step /= sizeof(*dx);
|
||||||
dx_offset /= sizeof(*dx);
|
dx_offset /= sizeof(*dx);
|
||||||
@ -261,8 +261,8 @@ void calcMagnitude
|
|||||||
// mag magnitudes calculated from calcMagnitude function
|
// mag magnitudes calculated from calcMagnitude function
|
||||||
// map output containing raw edge types
|
// map output containing raw edge types
|
||||||
__kernel
|
__kernel
|
||||||
void calcMap
|
void calcMap
|
||||||
(
|
(
|
||||||
__global const int * dx,
|
__global const int * dx,
|
||||||
__global const int * dy,
|
__global const int * dy,
|
||||||
__global const float * mag,
|
__global const float * mag,
|
||||||
@ -279,7 +279,7 @@ void calcMap
|
|||||||
int mag_offset,
|
int mag_offset,
|
||||||
int map_step,
|
int map_step,
|
||||||
int map_offset
|
int map_offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
dx_step /= sizeof(*dx);
|
dx_step /= sizeof(*dx);
|
||||||
dx_offset /= sizeof(*dx);
|
dx_offset /= sizeof(*dx);
|
||||||
@ -361,8 +361,8 @@ void calcMap
|
|||||||
|
|
||||||
// non local memory version
|
// non local memory version
|
||||||
__kernel
|
__kernel
|
||||||
void calcMap_2
|
void calcMap_2
|
||||||
(
|
(
|
||||||
__global const int * dx,
|
__global const int * dx,
|
||||||
__global const int * dy,
|
__global const int * dy,
|
||||||
__global const float * mag,
|
__global const float * mag,
|
||||||
@ -379,7 +379,7 @@ void calcMap_2
|
|||||||
int mag_offset,
|
int mag_offset,
|
||||||
int map_step,
|
int map_step,
|
||||||
int map_offset
|
int map_offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
dx_step /= sizeof(*dx);
|
dx_step /= sizeof(*dx);
|
||||||
dx_offset /= sizeof(*dx);
|
dx_offset /= sizeof(*dx);
|
||||||
@ -440,8 +440,8 @@ void calcMap_2
|
|||||||
|
|
||||||
// [256, 1, 1] threaded, local memory version
|
// [256, 1, 1] threaded, local memory version
|
||||||
__kernel
|
__kernel
|
||||||
void calcMap_3
|
void calcMap_3
|
||||||
(
|
(
|
||||||
__global const int * dx,
|
__global const int * dx,
|
||||||
__global const int * dy,
|
__global const int * dy,
|
||||||
__global const float * mag,
|
__global const float * mag,
|
||||||
@ -458,7 +458,7 @@ void calcMap_3
|
|||||||
int mag_offset,
|
int mag_offset,
|
||||||
int map_step,
|
int map_step,
|
||||||
int map_offset
|
int map_offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
dx_step /= sizeof(*dx);
|
dx_step /= sizeof(*dx);
|
||||||
dx_offset /= sizeof(*dx);
|
dx_offset /= sizeof(*dx);
|
||||||
@ -556,8 +556,8 @@ void calcMap_3
|
|||||||
// st the potiential edge points found in this kernel call
|
// st the potiential edge points found in this kernel call
|
||||||
// counter the number of potiential edge points
|
// counter the number of potiential edge points
|
||||||
__kernel
|
__kernel
|
||||||
void edgesHysteresisLocal
|
void edgesHysteresisLocal
|
||||||
(
|
(
|
||||||
__global int * map,
|
__global int * map,
|
||||||
__global ushort2 * st,
|
__global ushort2 * st,
|
||||||
volatile __global unsigned int * counter,
|
volatile __global unsigned int * counter,
|
||||||
@ -565,7 +565,7 @@ void edgesHysteresisLocal
|
|||||||
int cols,
|
int cols,
|
||||||
int map_step,
|
int map_step,
|
||||||
int map_offset
|
int map_offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
map_step /= sizeof(*map);
|
map_step /= sizeof(*map);
|
||||||
map_offset /= sizeof(*map);
|
map_offset /= sizeof(*map);
|
||||||
@ -599,7 +599,7 @@ void edgesHysteresisLocal
|
|||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int k = 0; k < 16; ++k)
|
for (int k = 0; k < 16; ++k)
|
||||||
{
|
{
|
||||||
n = 0;
|
n = 0;
|
||||||
@ -653,8 +653,8 @@ __constant c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
|
|||||||
|
|
||||||
#define stack_size 512
|
#define stack_size 512
|
||||||
__kernel
|
__kernel
|
||||||
void edgesHysteresisGlobal
|
void edgesHysteresisGlobal
|
||||||
(
|
(
|
||||||
__global int * map,
|
__global int * map,
|
||||||
__global ushort2 * st1,
|
__global ushort2 * st1,
|
||||||
__global ushort2 * st2,
|
__global ushort2 * st2,
|
||||||
@ -664,7 +664,7 @@ void edgesHysteresisGlobal
|
|||||||
int count,
|
int count,
|
||||||
int map_step,
|
int map_step,
|
||||||
int map_offset
|
int map_offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
|
||||||
map_step /= sizeof(*map);
|
map_step /= sizeof(*map);
|
||||||
@ -684,6 +684,12 @@ void edgesHysteresisGlobal
|
|||||||
|
|
||||||
__local ushort2 s_st[stack_size];
|
__local ushort2 s_st[stack_size];
|
||||||
|
|
||||||
|
if(gidx + gidy == 0)
|
||||||
|
{
|
||||||
|
*counter = 0;
|
||||||
|
}
|
||||||
|
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||||
|
|
||||||
if(lidx == 0)
|
if(lidx == 0)
|
||||||
{
|
{
|
||||||
s_counter = 0;
|
s_counter = 0;
|
||||||
@ -770,8 +776,8 @@ void edgesHysteresisGlobal
|
|||||||
// map edge type mappings
|
// map edge type mappings
|
||||||
// dst edge output
|
// dst edge output
|
||||||
__kernel
|
__kernel
|
||||||
void getEdges
|
void getEdges
|
||||||
(
|
(
|
||||||
__global const int * map,
|
__global const int * map,
|
||||||
__global uchar * dst,
|
__global uchar * dst,
|
||||||
int rows,
|
int rows,
|
||||||
@ -780,7 +786,7 @@ void getEdges
|
|||||||
int map_offset,
|
int map_offset,
|
||||||
int dst_step,
|
int dst_step,
|
||||||
int dst_offset
|
int dst_offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
map_step /= sizeof(*map);
|
map_step /= sizeof(*map);
|
||||||
map_offset /= sizeof(*map);
|
map_offset /= sizeof(*map);
|
||||||
|
@ -58,17 +58,9 @@
|
|||||||
// Image read mode
|
// Image read mode
|
||||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
|
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
|
||||||
|
|
||||||
|
#define FLT_EPSILON (1e-15)
|
||||||
#define CV_PI_F 3.14159265f
|
#define CV_PI_F 3.14159265f
|
||||||
|
|
||||||
// print greyscale image to show image layout
|
|
||||||
__kernel void printImage(image2d_t img)
|
|
||||||
{
|
|
||||||
printf("(%d, %d) - %3d \n",
|
|
||||||
get_global_id(0),
|
|
||||||
get_global_id(1),
|
|
||||||
read_imageui(img, (int2)(get_global_id(0), get_global_id(1))).x
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use integral image to calculate haar wavelets.
|
// Use integral image to calculate haar wavelets.
|
||||||
// N = 2
|
// N = 2
|
||||||
@ -444,7 +436,6 @@ __kernel
|
|||||||
float val0 = N9[localLin];
|
float val0 = N9[localLin];
|
||||||
if (val0 > c_hessianThreshold)
|
if (val0 > c_hessianThreshold)
|
||||||
{
|
{
|
||||||
//printf(\"(%3d, %3d) N9[%3d]=%7.1f val0=%7.1f\\n\", l_x, l_y, localLin - zoff, N9[localLin], val0);
|
|
||||||
// Coordinates for the start of the wavelet in the sum image. There
|
// Coordinates for the start of the wavelet in the sum image. There
|
||||||
// is some integer division involved, so don't try to simplify this
|
// is some integer division involved, so don't try to simplify this
|
||||||
// (cancel out sampleStep) without checking the result is the same
|
// (cancel out sampleStep) without checking the result is the same
|
||||||
@ -726,6 +717,7 @@ __kernel
|
|||||||
__global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
|
__global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
|
||||||
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
|
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
|
||||||
|
|
||||||
|
|
||||||
volatile __local float s_X[128];
|
volatile __local float s_X[128];
|
||||||
volatile __local float s_Y[128];
|
volatile __local float s_Y[128];
|
||||||
volatile __local float s_angle[128];
|
volatile __local float s_angle[128];
|
||||||
@ -737,6 +729,7 @@ __kernel
|
|||||||
and building the keypoint descriptor are defined relative to 's' */
|
and building the keypoint descriptor are defined relative to 's' */
|
||||||
const float s = featureSize[get_group_id(0)] * 1.2f / 9.0f;
|
const float s = featureSize[get_group_id(0)] * 1.2f / 9.0f;
|
||||||
|
|
||||||
|
|
||||||
/* To find the dominant orientation, the gradients in x and y are
|
/* To find the dominant orientation, the gradients in x and y are
|
||||||
sampled in a circle of radius 6s using wavelets of size 4s.
|
sampled in a circle of radius 6s using wavelets of size 4s.
|
||||||
We ensure the gradient wavelet size is even to ensure the
|
We ensure the gradient wavelet size is even to ensure the
|
||||||
@ -765,9 +758,11 @@ __kernel
|
|||||||
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x);
|
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x);
|
||||||
|
|
||||||
angle = atan2(Y, X);
|
angle = atan2(Y, X);
|
||||||
|
|
||||||
if (angle < 0)
|
if (angle < 0)
|
||||||
angle += 2.0f * CV_PI_F;
|
angle += 2.0f * CV_PI_F;
|
||||||
angle *= 180.0f / CV_PI_F;
|
angle *= 180.0f / CV_PI_F;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s_X[tid] = X;
|
s_X[tid] = X;
|
||||||
@ -807,7 +802,6 @@ __kernel
|
|||||||
sumx += s_X[get_local_id(0) + 96];
|
sumx += s_X[get_local_id(0) + 96];
|
||||||
sumy += s_Y[get_local_id(0) + 96];
|
sumy += s_Y[get_local_id(0) + 96];
|
||||||
}
|
}
|
||||||
|
|
||||||
reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
|
reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
|
||||||
reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
|
reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
|
||||||
|
|
||||||
@ -818,10 +812,8 @@ __kernel
|
|||||||
bestx = sumx;
|
bestx = sumx;
|
||||||
besty = sumy;
|
besty = sumy;
|
||||||
}
|
}
|
||||||
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (get_local_id(0) == 0)
|
if (get_local_id(0) == 0)
|
||||||
{
|
{
|
||||||
s_X[get_local_id(1)] = bestx;
|
s_X[get_local_id(1)] = bestx;
|
||||||
@ -846,6 +838,10 @@ __kernel
|
|||||||
kp_dir += 2.0f * CV_PI_F;
|
kp_dir += 2.0f * CV_PI_F;
|
||||||
kp_dir *= 180.0f / CV_PI_F;
|
kp_dir *= 180.0f / CV_PI_F;
|
||||||
|
|
||||||
|
kp_dir = 360.0f - kp_dir;
|
||||||
|
if (fabs(kp_dir - 360.f) < FLT_EPSILON)
|
||||||
|
kp_dir = 0.f;
|
||||||
|
|
||||||
featureDir[get_group_id(0)] = kp_dir;
|
featureDir[get_group_id(0)] = kp_dir;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -940,7 +936,10 @@ void calc_dx_dy(
|
|||||||
const float centerX = featureX[get_group_id(0)];
|
const float centerX = featureX[get_group_id(0)];
|
||||||
const float centerY = featureY[get_group_id(0)];
|
const float centerY = featureY[get_group_id(0)];
|
||||||
const float size = featureSize[get_group_id(0)];
|
const float size = featureSize[get_group_id(0)];
|
||||||
const float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f);
|
float descriptor_dir = 360.0f - featureDir[get_group_id(0)];
|
||||||
|
if (fabs(descriptor_dir - 360.f) < FLT_EPSILON)
|
||||||
|
descriptor_dir = 0.f;
|
||||||
|
descriptor_dir *= (float)(CV_PI_F / 180.0f);
|
||||||
|
|
||||||
/* The sampling intervals and wavelet sized for selecting an orientation
|
/* The sampling intervals and wavelet sized for selecting an orientation
|
||||||
and building the keypoint descriptor are defined relative to 's' */
|
and building the keypoint descriptor are defined relative to 's' */
|
||||||
|
@ -448,3 +448,42 @@ __kernel void compute_gradients_8UC1_kernel(const int height, const int width, c
|
|||||||
grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang;
|
grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Resize
|
||||||
|
|
||||||
|
__kernel void resize_8UC4_kernel(__global uchar4 * dst, __global const uchar4 * src,
|
||||||
|
int dst_offset, int src_offset, int dst_step, int src_step,
|
||||||
|
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||||
|
{
|
||||||
|
int dx = get_global_id(0);
|
||||||
|
int dy = get_global_id(1);
|
||||||
|
|
||||||
|
int sx = (int)floor(dx*ifx+0.5f);
|
||||||
|
int sy = (int)floor(dy*ify+0.5f);
|
||||||
|
sx = min(sx, src_cols-1);
|
||||||
|
sy = min(sy, src_rows-1);
|
||||||
|
int dpos = (dst_offset>>2) + dy * (dst_step>>2) + dx;
|
||||||
|
int spos = (src_offset>>2) + sy * (src_step>>2) + sx;
|
||||||
|
|
||||||
|
if(dx<dst_cols && dy<dst_rows)
|
||||||
|
dst[dpos] = src[spos];
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void resize_8UC1_kernel(__global uchar * dst, __global const uchar * src,
|
||||||
|
int dst_offset, int src_offset, int dst_step, int src_step,
|
||||||
|
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||||
|
{
|
||||||
|
int dx = get_global_id(0);
|
||||||
|
int dy = get_global_id(1);
|
||||||
|
|
||||||
|
int sx = (int)floor(dx*ifx+0.5f);
|
||||||
|
int sy = (int)floor(dy*ify+0.5f);
|
||||||
|
sx = min(sx, src_cols-1);
|
||||||
|
sy = min(sy, src_rows-1);
|
||||||
|
int dpos = dst_offset + dy * dst_step + dx;
|
||||||
|
int spos = src_offset + sy * src_step + sx;
|
||||||
|
|
||||||
|
if(dx<dst_cols && dy<dst_rows)
|
||||||
|
dst[dpos] = src[spos];
|
||||||
|
}
|
@ -51,8 +51,6 @@ using namespace cv;
|
|||||||
using namespace cv::ocl;
|
using namespace cv::ocl;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#define EXT_FP64 0
|
|
||||||
|
|
||||||
#if !defined (HAVE_OPENCL)
|
#if !defined (HAVE_OPENCL)
|
||||||
void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
|
void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
|
||||||
#else
|
#else
|
||||||
@ -113,7 +111,6 @@ namespace cv { namespace ocl
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// SQDIFF
|
// SQDIFF
|
||||||
void matchTemplate_SQDIFF(
|
void matchTemplate_SQDIFF(
|
||||||
@ -137,11 +134,11 @@ namespace cv { namespace ocl
|
|||||||
{
|
{
|
||||||
matchTemplate_CCORR(image,templ,result,buf);
|
matchTemplate_CCORR(image,templ,result,buf);
|
||||||
buf.image_sums.resize(1);
|
buf.image_sums.resize(1);
|
||||||
buf.image_sqsums.resize(1);
|
|
||||||
|
|
||||||
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
|
||||||
|
|
||||||
#if EXT_FP64 && SQRSUM_FIXED
|
integral(image.reshape(1), buf.image_sums[0]);
|
||||||
|
|
||||||
|
#if SQRSUM_FIXED
|
||||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||||
#else
|
#else
|
||||||
Mat sqr_mat = templ.reshape(1);
|
Mat sqr_mat = templ.reshape(1);
|
||||||
@ -237,16 +234,12 @@ namespace cv { namespace ocl
|
|||||||
buf.image_sqsums.resize(1);
|
buf.image_sqsums.resize(1);
|
||||||
|
|
||||||
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
||||||
#if EXT_FP64 && SQRSUM_FIXED
|
#if SQRSUM_FIXED
|
||||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||||
#elif EXT_FP64
|
#else
|
||||||
oclMat templ_c1 = templ.reshape(1);
|
oclMat templ_c1 = templ.reshape(1);
|
||||||
multiply(templ_c1, templ_c1, templ_c1);
|
multiply(templ_c1, templ_c1, templ_c1);
|
||||||
unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
|
unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
|
||||||
#else
|
|
||||||
Mat m_templ_c1 = templ.reshape(1);
|
|
||||||
multiply(m_templ_c1, m_templ_c1, m_templ_c1);
|
|
||||||
unsigned long long templ_sqsum = (unsigned long long)sum(m_templ_c1)[0];
|
|
||||||
#endif
|
#endif
|
||||||
Context *clCxt = image.clCxt;
|
Context *clCxt = image.clCxt;
|
||||||
string kernelName = "normalizeKernel";
|
string kernelName = "normalizeKernel";
|
||||||
@ -332,17 +325,10 @@ namespace cv { namespace ocl
|
|||||||
if(image.channels() == 1)
|
if(image.channels() == 1)
|
||||||
{
|
{
|
||||||
buf.image_sums.resize(1);
|
buf.image_sums.resize(1);
|
||||||
// FIXME: temp fix for incorrect integral kernel
|
integral(image, buf.image_sums[0]);
|
||||||
oclMat tmp_oclmat;
|
|
||||||
integral(image, buf.image_sums[0], tmp_oclmat);
|
|
||||||
|
|
||||||
float templ_sum = 0;
|
float templ_sum = 0;
|
||||||
#if EXT_FP64
|
|
||||||
templ_sum = (float)sum(templ)[0] / templ.size().area();
|
templ_sum = (float)sum(templ)[0] / templ.size().area();
|
||||||
#else
|
|
||||||
Mat o_templ = templ;
|
|
||||||
templ_sum = (float)sum(o_templ)[0] / o_templ.size().area(); // temp fix for non-double supported machine
|
|
||||||
#endif
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||||
@ -351,29 +337,13 @@ namespace cv { namespace ocl
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
Vec4f templ_sum = Vec4f::all(0);
|
Vec4f templ_sum = Vec4f::all(0);
|
||||||
#if EXT_FP64
|
|
||||||
split(image,buf.images);
|
split(image,buf.images);
|
||||||
templ_sum = sum(templ) / templ.size().area();
|
templ_sum = sum(templ) / templ.size().area();
|
||||||
#else
|
|
||||||
// temp fix for non-double supported machine
|
|
||||||
Mat o_templ = templ, o_image = image;
|
|
||||||
vector<Mat> o_mat_vector;
|
|
||||||
o_mat_vector.resize(image.channels());
|
|
||||||
buf.images.resize(image.channels());
|
|
||||||
split(o_image, o_mat_vector);
|
|
||||||
for(int i = 0; i < o_mat_vector.size(); i ++)
|
|
||||||
{
|
|
||||||
buf.images[i] = oclMat(o_mat_vector[i]);
|
|
||||||
}
|
|
||||||
templ_sum = sum(o_templ) / templ.size().area();
|
|
||||||
#endif
|
|
||||||
buf.image_sums.resize(buf.images.size());
|
buf.image_sums.resize(buf.images.size());
|
||||||
|
|
||||||
for(int i = 0; i < image.channels(); i ++)
|
for(int i = 0; i < image.channels(); i ++)
|
||||||
{
|
{
|
||||||
// FIXME: temp fix for incorrect integral kernel
|
integral(buf.images[i], buf.image_sums[i]);
|
||||||
oclMat omat_temp;
|
|
||||||
integral(buf.images[i], buf.image_sums[i], omat_temp);
|
|
||||||
}
|
}
|
||||||
switch(image.channels())
|
switch(image.channels())
|
||||||
{
|
{
|
||||||
@ -432,10 +402,9 @@ namespace cv { namespace ocl
|
|||||||
integral(image, buf.image_sums[0], buf.image_sqsums[0]);
|
integral(image, buf.image_sums[0], buf.image_sqsums[0]);
|
||||||
float templ_sum = 0;
|
float templ_sum = 0;
|
||||||
float templ_sqsum = 0;
|
float templ_sqsum = 0;
|
||||||
#if EXT_FP64
|
|
||||||
templ_sum = (float)sum(templ)[0];
|
templ_sum = (float)sum(templ)[0];
|
||||||
#if SQRSUM_FIXED
|
#if SQRSUM_FIXED
|
||||||
templ_sqsum = sqrSum(templ);
|
templ_sqsum = sqrSum(templ)[0];
|
||||||
#else
|
#else
|
||||||
oclMat templ_sqr = templ;
|
oclMat templ_sqr = templ;
|
||||||
multiply(templ,templ, templ_sqr);
|
multiply(templ,templ, templ_sqr);
|
||||||
@ -443,13 +412,7 @@ namespace cv { namespace ocl
|
|||||||
#endif //SQRSUM_FIXED
|
#endif //SQRSUM_FIXED
|
||||||
templ_sqsum -= scale * templ_sum * templ_sum;
|
templ_sqsum -= scale * templ_sum * templ_sum;
|
||||||
templ_sum *= scale;
|
templ_sum *= scale;
|
||||||
#else
|
|
||||||
// temp fix for non-double supported machine
|
|
||||||
Mat o_templ = templ;
|
|
||||||
templ_sum = (float)sum(o_templ)[0];
|
|
||||||
templ_sqsum = sum(o_templ.mul(o_templ))[0] - scale * templ_sum * templ_sum;
|
|
||||||
templ_sum *= scale;
|
|
||||||
#endif
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||||
@ -463,7 +426,7 @@ namespace cv { namespace ocl
|
|||||||
{
|
{
|
||||||
Vec4f templ_sum = Vec4f::all(0);
|
Vec4f templ_sum = Vec4f::all(0);
|
||||||
Vec4f templ_sqsum = Vec4f::all(0);
|
Vec4f templ_sqsum = Vec4f::all(0);
|
||||||
#if EXT_FP64
|
|
||||||
split(image,buf.images);
|
split(image,buf.images);
|
||||||
templ_sum = sum(templ);
|
templ_sum = sum(templ);
|
||||||
#if SQRSUM_FIXED
|
#if SQRSUM_FIXED
|
||||||
@ -475,21 +438,6 @@ namespace cv { namespace ocl
|
|||||||
#endif //SQRSUM_FIXED
|
#endif //SQRSUM_FIXED
|
||||||
templ_sqsum -= scale * templ_sum * templ_sum;
|
templ_sqsum -= scale * templ_sum * templ_sum;
|
||||||
|
|
||||||
#else
|
|
||||||
// temp fix for non-double supported machine
|
|
||||||
Mat o_templ = templ, o_image = image;
|
|
||||||
|
|
||||||
vector<Mat> o_mat_vector;
|
|
||||||
o_mat_vector.resize(image.channels());
|
|
||||||
buf.images.resize(image.channels());
|
|
||||||
split(o_image, o_mat_vector);
|
|
||||||
for(int i = 0; i < o_mat_vector.size(); i ++)
|
|
||||||
{
|
|
||||||
buf.images[i] = oclMat(o_mat_vector[i]);
|
|
||||||
}
|
|
||||||
templ_sum = sum(o_templ);
|
|
||||||
templ_sqsum = sum(o_templ.mul(o_templ));
|
|
||||||
#endif
|
|
||||||
float templ_sqsum_sum = 0;
|
float templ_sqsum_sum = 0;
|
||||||
for(int i = 0; i < image.channels(); i ++)
|
for(int i = 0; i < image.channels(); i ++)
|
||||||
{
|
{
|
||||||
|
@ -1,4 +1,49 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||||
|
// Yao Wang, yao@multicorewareinc.com
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
@ -24,7 +69,6 @@ namespace cv
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
void pyrdown_run(const oclMat &src, const oclMat &dst)
|
void pyrdown_run(const oclMat &src, const oclMat &dst)
|
||||||
{
|
{
|
||||||
CV_Assert(src.cols / 2 == dst.cols && src.rows / 2 == dst.rows);
|
|
||||||
|
|
||||||
CV_Assert(src.type() == dst.type());
|
CV_Assert(src.type() == dst.type());
|
||||||
CV_Assert(src.depth() != CV_8S);
|
CV_Assert(src.depth() != CV_8S);
|
||||||
@ -108,7 +152,7 @@ void cv::ocl::pyrDown(const oclMat& src, oclMat& dst)
|
|||||||
|
|
||||||
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
|
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
|
||||||
|
|
||||||
//dst.step = dst.rows;
|
dst.download_channels = src.download_channels;
|
||||||
|
|
||||||
pyrdown_run(src, dst);
|
pyrdown_run(src, dst);
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
//
|
//
|
||||||
// @Authors
|
// @Authors
|
||||||
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||||
|
// Yao Wang, yao@multicorewareinc.com
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
@ -63,6 +64,7 @@ namespace cv { namespace ocl
|
|||||||
void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst)
|
void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst)
|
||||||
{
|
{
|
||||||
dst.create(src.rows * 2, src.cols * 2, src.type());
|
dst.create(src.rows * 2, src.cols * 2, src.type());
|
||||||
|
dst.download_channels=src.download_channels;
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
|
|
||||||
const std::string kernelName = "pyrUp";
|
const std::string kernelName = "pyrUp";
|
||||||
|
@ -149,8 +149,7 @@ namespace
|
|||||||
//loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
|
//loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
|
||||||
|
|
||||||
bindImgTex(img);
|
bindImgTex(img);
|
||||||
oclMat integral_sqsum;
|
integral(img, surf_.sum); // the two argumented integral version is incorrect
|
||||||
integral(img, surf_.sum, integral_sqsum); // the two argumented integral version is incorrect
|
|
||||||
|
|
||||||
bindSumTex(surf_.sum);
|
bindSumTex(surf_.sum);
|
||||||
maskSumTex = 0;
|
maskSumTex = 0;
|
||||||
|
@ -74,12 +74,10 @@ PARAM_TEST_CASE(ColumnSum, cv::Size, bool )
|
|||||||
TEST_P(ColumnSum, Accuracy)
|
TEST_P(ColumnSum, Accuracy)
|
||||||
{
|
{
|
||||||
cv::Mat src = randomMat(size, CV_32FC1);
|
cv::Mat src = randomMat(size, CV_32FC1);
|
||||||
//cv::Mat src(size,CV_32FC1);
|
cv::ocl::oclMat d_dst;
|
||||||
|
cv::ocl::oclMat d_src(src);
|
||||||
|
|
||||||
//cv::ocl::oclMat d_dst = ::createMat(size,src.type(),useRoi);
|
cv::ocl::columnSum(d_src,d_dst);
|
||||||
cv::ocl::oclMat d_dst = loadMat(src,useRoi);
|
|
||||||
|
|
||||||
cv::ocl::columnSum(loadMat(src,useRoi),d_dst);
|
|
||||||
|
|
||||||
cv::Mat dst(d_dst);
|
cv::Mat dst(d_dst);
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
//
|
//
|
||||||
// @Authors
|
// @Authors
|
||||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||||
|
// Yao Wang yao@multicorewareinc.com
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
@ -43,9 +44,6 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
//#define PRINT_CPU_TIME 1000
|
|
||||||
//#define PRINT_TIME
|
|
||||||
|
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
@ -58,66 +56,15 @@ using namespace cvtest;
|
|||||||
using namespace testing;
|
using namespace testing;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
PARAM_TEST_CASE(PyrDown, MatType, bool)
|
PARAM_TEST_CASE(PyrDown, MatType, int)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
cv::Scalar val;
|
int channels;
|
||||||
|
|
||||||
//src mat
|
|
||||||
cv::Mat mat1;
|
|
||||||
cv::Mat mat2;
|
|
||||||
cv::Mat mask;
|
|
||||||
cv::Mat dst;
|
|
||||||
cv::Mat dst1; //bak, for two outputs
|
|
||||||
|
|
||||||
// set up roi
|
|
||||||
int roicols;
|
|
||||||
int roirows;
|
|
||||||
int src1x;
|
|
||||||
int src1y;
|
|
||||||
int src2x;
|
|
||||||
int src2y;
|
|
||||||
int dstx;
|
|
||||||
int dsty;
|
|
||||||
int maskx;
|
|
||||||
int masky;
|
|
||||||
|
|
||||||
|
|
||||||
//src mat with roi
|
|
||||||
cv::Mat mat1_roi;
|
|
||||||
cv::Mat mat2_roi;
|
|
||||||
cv::Mat mask_roi;
|
|
||||||
cv::Mat dst_roi;
|
|
||||||
cv::Mat dst1_roi; //bak
|
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
|
||||||
//ocl dst mat for testing
|
|
||||||
cv::ocl::oclMat gdst_whole;
|
|
||||||
cv::ocl::oclMat gdst1_whole; //bak
|
|
||||||
|
|
||||||
//ocl mat with roi
|
|
||||||
cv::ocl::oclMat gmat1;
|
|
||||||
cv::ocl::oclMat gmat2;
|
|
||||||
cv::ocl::oclMat gdst;
|
|
||||||
cv::ocl::oclMat gdst1; //bak
|
|
||||||
cv::ocl::oclMat gmask;
|
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
type = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
|
channels = GET_PARAM(1);
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
|
||||||
|
|
||||||
cv::Size size(MWIDTH, MHEIGHT);
|
|
||||||
|
|
||||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
|
||||||
mat2 = randomMat(rng, size, type, 5, 16, false);
|
|
||||||
dst = randomMat(rng, size, type, 5, 16, false);
|
|
||||||
dst1 = randomMat(rng, size, type, 5, 16, false);
|
|
||||||
mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
|
|
||||||
|
|
||||||
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
|
|
||||||
|
|
||||||
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
|
|
||||||
|
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
@ -127,169 +74,36 @@ PARAM_TEST_CASE(PyrDown, MatType, bool)
|
|||||||
|
|
||||||
void Cleanup()
|
void Cleanup()
|
||||||
{
|
{
|
||||||
mat1.release();
|
|
||||||
mat2.release();
|
|
||||||
mask.release();
|
|
||||||
dst.release();
|
|
||||||
dst1.release();
|
|
||||||
mat1_roi.release();
|
|
||||||
mat2_roi.release();
|
|
||||||
mask_roi.release();
|
|
||||||
dst_roi.release();
|
|
||||||
dst1_roi.release();
|
|
||||||
|
|
||||||
gdst_whole.release();
|
|
||||||
gdst1_whole.release();
|
|
||||||
gmat1.release();
|
|
||||||
gmat2.release();
|
|
||||||
gdst.release();
|
|
||||||
gdst1.release();
|
|
||||||
gmask.release();
|
|
||||||
}
|
|
||||||
|
|
||||||
void random_roi()
|
|
||||||
{
|
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
|
||||||
|
|
||||||
#ifdef RANDOMROI
|
|
||||||
//randomize ROI
|
|
||||||
roicols = rng.uniform(1, mat1.cols);
|
|
||||||
roirows = rng.uniform(1, mat1.rows);
|
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
|
||||||
src1y = rng.uniform(0, mat1.rows - roirows);
|
|
||||||
dstx = rng.uniform(0, dst.cols - roicols);
|
|
||||||
dsty = rng.uniform(0, dst.rows - roirows);
|
|
||||||
#else
|
|
||||||
roicols = mat1.cols;
|
|
||||||
roirows = mat1.rows;
|
|
||||||
src1x = 0;
|
|
||||||
src1y = 0;
|
|
||||||
dstx = 0;
|
|
||||||
dsty = 0;
|
|
||||||
#endif
|
|
||||||
maskx = rng.uniform(0, mask.cols - roicols);
|
|
||||||
masky = rng.uniform(0, mask.rows - roirows);
|
|
||||||
src2x = rng.uniform(0, mat2.cols - roicols);
|
|
||||||
src2y = rng.uniform(0, mat2.rows - roirows);
|
|
||||||
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
|
|
||||||
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
|
|
||||||
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
|
|
||||||
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
|
|
||||||
dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows));
|
|
||||||
|
|
||||||
gdst_whole = dst;
|
|
||||||
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
|
|
||||||
|
|
||||||
gdst1_whole = dst1;
|
|
||||||
gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
|
|
||||||
|
|
||||||
gmat1 = mat1_roi;
|
|
||||||
gmat2 = mat2_roi;
|
|
||||||
gmask = mask_roi; //end
|
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define VARNAME(A) string(#A);
|
|
||||||
|
|
||||||
|
|
||||||
void PrePrint()
|
|
||||||
{
|
|
||||||
//for(int i = 0; i < MHEIGHT; i++)
|
|
||||||
//{
|
|
||||||
// printf("(%d) ", i);
|
|
||||||
// for(int k = 0; k < MWIDTH; k++)
|
|
||||||
// {
|
|
||||||
// printf("%d ", mat1_roi.data[i * MHEIGHT + k]);
|
|
||||||
// }
|
|
||||||
// printf("\n");
|
|
||||||
//}
|
|
||||||
}
|
|
||||||
|
|
||||||
void PostPrint()
|
|
||||||
{
|
|
||||||
//dst_roi.convertTo(dst_roi,CV_32S);
|
|
||||||
//cpu_dst.convertTo(cpu_dst,CV_32S);
|
|
||||||
//dst_roi -= cpu_dst;
|
|
||||||
//cpu_dst -= dst_roi;
|
|
||||||
//for(int i = 0; i < MHEIGHT / 2; i++)
|
|
||||||
//{
|
|
||||||
// printf("(%d) ", i);
|
|
||||||
// for(int k = 0; k < MWIDTH / 2; k++)
|
|
||||||
// {
|
|
||||||
// if(gmat1.depth() == 0)
|
|
||||||
// {
|
|
||||||
// if(gmat1.channels() == 1)
|
|
||||||
// {
|
|
||||||
// printf("%d ", dst_roi.data[i * MHEIGHT / 2 + k]);
|
|
||||||
// }
|
|
||||||
// else
|
|
||||||
// {
|
|
||||||
// printf("%d ", ((unsigned*)dst_roi.data)[i * MHEIGHT / 2 + k]);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// else if(gmat1.depth() == 5)
|
|
||||||
// {
|
|
||||||
// printf("%.6f ", ((float*)dst_roi.data)[i * MHEIGHT / 2 + k]);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// printf("\n");
|
|
||||||
//}
|
|
||||||
//for(int i = 0; i < MHEIGHT / 2; i++)
|
|
||||||
//{
|
|
||||||
// printf("(%d) ", i);
|
|
||||||
// for(int k = 0; k < MWIDTH / 2; k++)
|
|
||||||
// {
|
|
||||||
// if(gmat1.depth() == 0)
|
|
||||||
// {
|
|
||||||
// if(gmat1.channels() == 1)
|
|
||||||
// {
|
|
||||||
// printf("%d ", cpu_dst.data[i * MHEIGHT / 2 + k]);
|
|
||||||
// }
|
|
||||||
// else
|
|
||||||
// {
|
|
||||||
// printf("%d ", ((unsigned*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// else if(gmat1.depth() == 5)
|
|
||||||
// {
|
|
||||||
// printf("%.6f ", ((float*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// printf("\n");
|
|
||||||
//}
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////PyrDown/////////////////////////////////////////////////
|
|
||||||
//struct PyrDown : ArithmTestBase {};
|
|
||||||
|
|
||||||
TEST_P(PyrDown, Mat)
|
TEST_P(PyrDown, Mat)
|
||||||
{
|
{
|
||||||
for(int j = 0; j < LOOP_TIMES; j++)
|
for(int j = 0; j < LOOP_TIMES; j++)
|
||||||
{
|
{
|
||||||
random_roi();
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
|
cv::Mat src=randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false);
|
||||||
|
|
||||||
cv::pyrDown(mat1_roi, dst_roi);
|
cv::ocl::oclMat gsrc(src), gdst;
|
||||||
cv::ocl::pyrDown(gmat1, gdst);
|
cv::Mat dst_cpu;
|
||||||
|
cv::pyrDown(src, dst_cpu);
|
||||||
|
cv::ocl::pyrDown(gsrc, gdst);
|
||||||
|
|
||||||
cv::Mat cpu_dst;
|
cv::Mat dst;
|
||||||
gdst.download(cpu_dst);
|
gdst.download(dst);
|
||||||
char s[1024];
|
char s[1024]={0};
|
||||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_roi, cpu_dst, dst_roi.depth() == CV_32F ? 1e-5f : 1.0f, s);
|
EXPECT_MAT_NEAR(dst, dst_cpu, dst.depth() == CV_32F ? 1e-4f : 1.0f, s);
|
||||||
|
|
||||||
Cleanup();
|
Cleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//********test****************
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
|
||||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
|
Values(CV_8U, CV_32F), Values(1, 3, 4)));
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
|
||||||
|
|
||||||
|
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
//
|
//
|
||||||
// @Authors
|
// @Authors
|
||||||
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||||
|
// Yao Wang yao@multicorewareinc.com
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
@ -48,44 +49,49 @@
|
|||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cvtest;
|
||||||
|
using namespace testing;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
PARAM_TEST_CASE(PyrUp,cv::Size,int)
|
PARAM_TEST_CASE(PyrUp, MatType, int)
|
||||||
{
|
{
|
||||||
cv::Size size;
|
|
||||||
int type;
|
int type;
|
||||||
|
int channels;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
//int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
//int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
size = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
type = GET_PARAM(1);
|
channels = GET_PARAM(1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(PyrUp,Accuracy)
|
TEST_P(PyrUp,Accuracy)
|
||||||
{
|
{
|
||||||
cv::Mat src = randomMat(size,type);
|
for(int j = 0; j < LOOP_TIMES; j++)
|
||||||
|
{
|
||||||
|
Size size(MWIDTH, MHEIGHT);
|
||||||
cv::Mat dst_gold;
|
Mat src = randomMat(size,CV_MAKETYPE(type, channels));
|
||||||
cv::pyrUp(src,dst_gold);
|
Mat dst_gold;
|
||||||
|
pyrUp(src,dst_gold);
|
||||||
cv::ocl::oclMat dst;
|
ocl::oclMat dst;
|
||||||
cv::ocl::oclMat srcMat(src);
|
ocl::oclMat srcMat(src);
|
||||||
cv::ocl::pyrUp(srcMat,dst);
|
ocl::pyrUp(srcMat,dst);
|
||||||
|
Mat cpu_dst;
|
||||||
|
dst.download(cpu_dst);
|
||||||
char s[100]={0};
|
char s[100]={0};
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_gold, dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s);
|
EXPECT_MAT_NEAR(dst_gold, cpu_dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 1
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine(
|
||||||
testing::Values(cv::Size(32, 32)),
|
Values(CV_8U, CV_32F), Values(1, 3, 4)));
|
||||||
testing::Values(MatType(CV_8UC1),MatType(CV_16UC1),MatType(CV_32FC1),MatType(CV_8UC4),
|
|
||||||
MatType(CV_16UC4),MatType(CV_32FC4))));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
Loading…
x
Reference in New Issue
Block a user