format files to ANSI C style with coolformat
change the download channels to oclchannles() fix bugs of arithm functions perf fix of bilateral bug fix of split test case add build_warps functions
This commit is contained in:
@@ -55,22 +55,22 @@ namespace cv
|
|||||||
//////////////////////////////// oclMat ////////////////////////////////
|
//////////////////////////////// oclMat ////////////////////////////////
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
inline oclMat::oclMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0) {}
|
inline oclMat::oclMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) {}
|
||||||
|
|
||||||
inline oclMat::oclMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
|
inline oclMat::oclMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
|
||||||
{
|
{
|
||||||
if( _rows > 0 && _cols > 0 )
|
if( _rows > 0 && _cols > 0 )
|
||||||
create( _rows, _cols, _type );
|
create( _rows, _cols, _type );
|
||||||
}
|
}
|
||||||
|
|
||||||
inline oclMat::oclMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
|
inline oclMat::oclMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
|
||||||
{
|
{
|
||||||
if( _size.height > 0 && _size.width > 0 )
|
if( _size.height > 0 && _size.width > 0 )
|
||||||
create( _size.height, _size.width, _type );
|
create( _size.height, _size.width, _type );
|
||||||
}
|
}
|
||||||
|
|
||||||
inline oclMat::oclMat(int _rows, int _cols, int _type, const Scalar &_s)
|
inline oclMat::oclMat(int _rows, int _cols, int _type, const Scalar &_s)
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
|
||||||
{
|
{
|
||||||
if(_rows > 0 && _cols > 0)
|
if(_rows > 0 && _cols > 0)
|
||||||
{
|
{
|
||||||
@@ -80,7 +80,7 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline oclMat::oclMat(Size _size, int _type, const Scalar &_s)
|
inline oclMat::oclMat(Size _size, int _type, const Scalar &_s)
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
|
||||||
{
|
{
|
||||||
if( _size.height > 0 && _size.width > 0 )
|
if( _size.height > 0 && _size.width > 0 )
|
||||||
{
|
{
|
||||||
@@ -91,18 +91,18 @@ namespace cv
|
|||||||
|
|
||||||
inline oclMat::oclMat(const oclMat &m)
|
inline oclMat::oclMat(const oclMat &m)
|
||||||
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data),
|
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data),
|
||||||
refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), download_channels(m.download_channels)
|
refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols)
|
||||||
{
|
{
|
||||||
if( refcount )
|
if( refcount )
|
||||||
CV_XADD(refcount, 1);
|
CV_XADD(refcount, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline oclMat::oclMat(int _rows, int _cols, int _type, void *_data, size_t _step)
|
inline oclMat::oclMat(int _rows, int _cols, int _type, void *_data, size_t _step)
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0),
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0),
|
||||||
datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
|
datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
|
||||||
{
|
{
|
||||||
cv::Mat m(_rows,_cols,_type,_data,_step);
|
cv::Mat m(_rows, _cols, _type, _data, _step);
|
||||||
upload(m);
|
upload(m);
|
||||||
//size_t minstep = cols * elemSize();
|
//size_t minstep = cols * elemSize();
|
||||||
//if( step == Mat::AUTO_STEP )
|
//if( step == Mat::AUTO_STEP )
|
||||||
//{
|
//{
|
||||||
@@ -117,14 +117,14 @@ namespace cv
|
|||||||
//}
|
//}
|
||||||
//dataend += step * (rows - 1) + minstep;
|
//dataend += step * (rows - 1) + minstep;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline oclMat::oclMat(Size _size, int _type, void *_data, size_t _step)
|
inline oclMat::oclMat(Size _size, int _type, void *_data, size_t _step)
|
||||||
: flags(0), rows(0), cols(0),
|
: flags(0), rows(0), cols(0),
|
||||||
step(0), data(0), refcount(0),
|
step(0), data(0), refcount(0),
|
||||||
datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
|
datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
|
||||||
{
|
{
|
||||||
cv::Mat m(_size,_type,_data,_step);
|
cv::Mat m(_size, _type, _data, _step);
|
||||||
upload(m);
|
upload(m);
|
||||||
//size_t minstep = cols * elemSize();
|
//size_t minstep = cols * elemSize();
|
||||||
//if( step == Mat::AUTO_STEP )
|
//if( step == Mat::AUTO_STEP )
|
||||||
//{
|
//{
|
||||||
@@ -152,7 +152,6 @@ namespace cv
|
|||||||
wholerows = m.wholerows;
|
wholerows = m.wholerows;
|
||||||
wholecols = m.wholecols;
|
wholecols = m.wholecols;
|
||||||
offset = m.offset;
|
offset = m.offset;
|
||||||
download_channels = m.download_channels;
|
|
||||||
if( rowRange == Range::all() )
|
if( rowRange == Range::all() )
|
||||||
rows = m.rows;
|
rows = m.rows;
|
||||||
else
|
else
|
||||||
@@ -184,7 +183,7 @@ namespace cv
|
|||||||
inline oclMat::oclMat(const oclMat &m, const Rect &roi)
|
inline oclMat::oclMat(const oclMat &m, const Rect &roi)
|
||||||
: flags(m.flags), rows(roi.height), cols(roi.width),
|
: flags(m.flags), rows(roi.height), cols(roi.width),
|
||||||
step(m.step), data(m.data), refcount(m.refcount),
|
step(m.step), data(m.data), refcount(m.refcount),
|
||||||
datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), download_channels(m.download_channels)
|
datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols)
|
||||||
{
|
{
|
||||||
flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
|
flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
|
||||||
offset += roi.y * step + roi.x * elemSize();
|
offset += roi.y * step + roi.x * elemSize();
|
||||||
@@ -197,7 +196,7 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline oclMat::oclMat(const Mat &m)
|
inline oclMat::oclMat(const Mat &m)
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) , offset(0), wholerows(0), wholecols(0), download_channels(0)
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) , offset(0), wholerows(0), wholecols(0)
|
||||||
{
|
{
|
||||||
//clCxt = Context::getContext();
|
//clCxt = Context::getContext();
|
||||||
upload(m);
|
upload(m);
|
||||||
@@ -227,7 +226,6 @@ namespace cv
|
|||||||
wholerows = m.wholerows;
|
wholerows = m.wholerows;
|
||||||
wholecols = m.wholecols;
|
wholecols = m.wholecols;
|
||||||
refcount = m.refcount;
|
refcount = m.refcount;
|
||||||
download_channels = m.download_channels;
|
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@@ -327,10 +325,9 @@ namespace cv
|
|||||||
std::swap( dataend, b.dataend );
|
std::swap( dataend, b.dataend );
|
||||||
std::swap( refcount, b.refcount );
|
std::swap( refcount, b.refcount );
|
||||||
std::swap( offset, b.offset );
|
std::swap( offset, b.offset );
|
||||||
std::swap( clCxt, b.clCxt );
|
std::swap( clCxt, b.clCxt );
|
||||||
std::swap( wholerows, b.wholerows );
|
std::swap( wholerows, b.wholerows );
|
||||||
std::swap( wholecols, b.wholecols );
|
std::swap( wholecols, b.wholecols );
|
||||||
std::swap( download_channels, b.download_channels);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void oclMat::locateROI( Size &wholeSize, Point &ofs ) const
|
inline void oclMat::locateROI( Size &wholeSize, Point &ofs ) const
|
||||||
@@ -366,7 +363,7 @@ namespace cv
|
|||||||
offset += (row1 - ofs.y) * step + (col1 - ofs.x) * esz;
|
offset += (row1 - ofs.y) * step + (col1 - ofs.x) * esz;
|
||||||
rows = row2 - row1;
|
rows = row2 - row1;
|
||||||
cols = col2 - col1;
|
cols = col2 - col1;
|
||||||
if( esz *cols == step || rows == 1 )
|
if( esz * cols == step || rows == 1 )
|
||||||
flags |= Mat::CONTINUOUS_FLAG;
|
flags |= Mat::CONTINUOUS_FLAG;
|
||||||
else
|
else
|
||||||
flags &= ~Mat::CONTINUOUS_FLAG;
|
flags &= ~Mat::CONTINUOUS_FLAG;
|
||||||
@@ -388,7 +385,7 @@ namespace cv
|
|||||||
}
|
}
|
||||||
inline size_t oclMat::elemSize() const
|
inline size_t oclMat::elemSize() const
|
||||||
{
|
{
|
||||||
return CV_ELEM_SIZE(flags);
|
return CV_ELEM_SIZE((CV_MAKE_TYPE(type(), oclchannels())));
|
||||||
}
|
}
|
||||||
inline size_t oclMat::elemSize1() const
|
inline size_t oclMat::elemSize1() const
|
||||||
{
|
{
|
||||||
@@ -398,6 +395,10 @@ namespace cv
|
|||||||
{
|
{
|
||||||
return CV_MAT_TYPE(flags);
|
return CV_MAT_TYPE(flags);
|
||||||
}
|
}
|
||||||
|
inline int oclMat::ocltype() const
|
||||||
|
{
|
||||||
|
return CV_MAKE_TYPE(depth(), oclchannels());
|
||||||
|
}
|
||||||
inline int oclMat::depth() const
|
inline int oclMat::depth() const
|
||||||
{
|
{
|
||||||
return CV_MAT_DEPTH(flags);
|
return CV_MAT_DEPTH(flags);
|
||||||
@@ -406,6 +407,10 @@ namespace cv
|
|||||||
{
|
{
|
||||||
return CV_MAT_CN(flags);
|
return CV_MAT_CN(flags);
|
||||||
}
|
}
|
||||||
|
inline int oclMat::oclchannels() const
|
||||||
|
{
|
||||||
|
return (CV_MAT_CN(flags)) == 3 ? 4 : (CV_MAT_CN(flags));
|
||||||
|
}
|
||||||
inline size_t oclMat::step1() const
|
inline size_t oclMat::step1() const
|
||||||
{
|
{
|
||||||
return step / elemSize1();
|
return step / elemSize1();
|
||||||
@@ -420,32 +425,32 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
inline uchar *oclMat::ptr(int y)
|
inline uchar *oclMat::ptr(int y)
|
||||||
{
|
{
|
||||||
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
||||||
CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n");
|
CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n");
|
||||||
return data + step * y;
|
return data + step * y;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const uchar *oclMat::ptr(int y) const
|
inline const uchar *oclMat::ptr(int y) const
|
||||||
{
|
{
|
||||||
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
||||||
CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n");
|
CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n");
|
||||||
return data + step * y;
|
return data + step * y;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename _Tp> inline _Tp *oclMat::ptr(int y)
|
template<typename _Tp> inline _Tp *oclMat::ptr(int y)
|
||||||
{
|
{
|
||||||
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
||||||
CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n");
|
CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n");
|
||||||
return (_Tp *)(data + step * y);
|
return (_Tp *)(data + step * y);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename _Tp> inline const _Tp *oclMat::ptr(int y) const
|
template<typename _Tp> inline const _Tp *oclMat::ptr(int y) const
|
||||||
{
|
{
|
||||||
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
||||||
CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n");
|
CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n");
|
||||||
return (const _Tp *)(data + step * y);
|
return (const _Tp *)(data + step * y);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -461,18 +466,20 @@ namespace cv
|
|||||||
a.swap(b);
|
a.swap(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void ensureSizeIsEnough(int rows, int cols, int type, oclMat& m)
|
inline void ensureSizeIsEnough(int rows, int cols, int type, oclMat &m)
|
||||||
{
|
{
|
||||||
if (m.type() == type && m.rows >= rows && m.cols >= cols)
|
if (m.type() == type && m.rows >= rows && m.cols >= cols)
|
||||||
m = m(Rect(0, 0, cols, rows));
|
m = m(Rect(0, 0, cols, rows));
|
||||||
else
|
else
|
||||||
m.create(rows, cols, type);
|
m.create(rows, cols, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void ensureSizeIsEnough(Size size, int type, oclMat &m)
|
||||||
|
{
|
||||||
|
ensureSizeIsEnough(size.height, size.width, type, m);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void ensureSizeIsEnough(Size size, int type, oclMat& m)
|
|
||||||
{
|
|
||||||
ensureSizeIsEnough(size.height, size.width, type, m);
|
|
||||||
}
|
|
||||||
} /* end of namespace ocl */
|
} /* end of namespace ocl */
|
||||||
|
|
||||||
} /* end of namespace cv */
|
} /* end of namespace cv */
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -42,7 +42,7 @@
|
|||||||
#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
|
#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
|
||||||
#define __OPENCV_TEST_INTERPOLATION_HPP__
|
#define __OPENCV_TEST_INTERPOLATION_HPP__
|
||||||
|
|
||||||
template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
template <typename T> T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
||||||
{
|
{
|
||||||
if (border_type == cv::BORDER_CONSTANT)
|
if (border_type == cv::BORDER_CONSTANT)
|
||||||
return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
|
return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
|
||||||
@@ -52,7 +52,7 @@ template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int bor
|
|||||||
|
|
||||||
template <typename T> struct NearestInterpolator
|
template <typename T> struct NearestInterpolator
|
||||||
{
|
{
|
||||||
static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
||||||
{
|
{
|
||||||
return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
|
return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
|
||||||
}
|
}
|
||||||
@@ -60,7 +60,7 @@ template <typename T> struct NearestInterpolator
|
|||||||
|
|
||||||
template <typename T> struct LinearInterpolator
|
template <typename T> struct LinearInterpolator
|
||||||
{
|
{
|
||||||
static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
||||||
{
|
{
|
||||||
x -= 0.5f;
|
x -= 0.5f;
|
||||||
y -= 0.5f;
|
y -= 0.5f;
|
||||||
@@ -85,7 +85,7 @@ template <typename T> struct CubicInterpolator
|
|||||||
{
|
{
|
||||||
static float getValue(float p[4], float x)
|
static float getValue(float p[4], float x)
|
||||||
{
|
{
|
||||||
return p[1] + 0.5 * x * (p[2] - p[0] + x*(2.0*p[0] - 5.0*p[1] + 4.0*p[2] - p[3] + x*(3.0*(p[1] - p[2]) + p[3] - p[0])));
|
return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
|
||||||
}
|
}
|
||||||
|
|
||||||
static float getValue(float p[4][4], float x, float y)
|
static float getValue(float p[4][4], float x, float y)
|
||||||
@@ -100,7 +100,7 @@ template <typename T> struct CubicInterpolator
|
|||||||
return getValue(arr, y);
|
return getValue(arr, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
||||||
{
|
{
|
||||||
int ix = cvRound(x);
|
int ix = cvRound(x);
|
||||||
int iy = cvRound(y);
|
int iy = cvRound(y);
|
||||||
|
|||||||
@@ -50,46 +50,46 @@ using namespace cvtest;
|
|||||||
using namespace testing;
|
using namespace testing;
|
||||||
|
|
||||||
void print_info()
|
void print_info()
|
||||||
{
|
{
|
||||||
printf("\n");
|
printf("\n");
|
||||||
#if defined _WIN32
|
#if defined _WIN32
|
||||||
# if defined _WIN64
|
# if defined _WIN64
|
||||||
puts("OS: Windows 64");
|
puts("OS: Windows 64");
|
||||||
# else
|
# else
|
||||||
puts("OS: Windows 32");
|
puts("OS: Windows 32");
|
||||||
# endif
|
# endif
|
||||||
#elif defined linux
|
#elif defined linux
|
||||||
# if defined _LP64
|
# if defined _LP64
|
||||||
puts("OS: Linux 64");
|
puts("OS: Linux 64");
|
||||||
# else
|
# else
|
||||||
puts("OS: Linux 32");
|
puts("OS: Linux 32");
|
||||||
# endif
|
# endif
|
||||||
#elif defined __APPLE__
|
#elif defined __APPLE__
|
||||||
# if defined _LP64
|
# if defined _LP64
|
||||||
puts("OS: Apple 64");
|
puts("OS: Apple 64");
|
||||||
# else
|
# else
|
||||||
puts("OS: Apple 32");
|
puts("OS: Apple 32");
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
std::vector<cv::ocl::Info> oclinfo;
|
std::vector<cv::ocl::Info> oclinfo;
|
||||||
TS::ptr()->init("ocl");
|
TS::ptr()->init("ocl");
|
||||||
InitGoogleTest(&argc, argv);
|
InitGoogleTest(&argc, argv);
|
||||||
|
|
||||||
print_info();
|
print_info();
|
||||||
int devnums = getDevice(oclinfo);
|
int devnums = getDevice(oclinfo);
|
||||||
if(devnums<1)
|
if(devnums < 1)
|
||||||
{
|
{
|
||||||
std::cout << "no device found\n";
|
std::cout << "no device found\n";
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
//if you want to use undefault device, set it here
|
//if you want to use undefault device, set it here
|
||||||
//setDevice(oclinfo[0]);
|
//setDevice(oclinfo[0]);
|
||||||
setBinpath(CLBINPATH);
|
setBinpath(CLBINPATH);
|
||||||
return RUN_ALL_TESTS();
|
return RUN_ALL_TESTS();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -55,66 +55,66 @@ using namespace std;
|
|||||||
|
|
||||||
PARAM_TEST_CASE(Blend, MatType, int)
|
PARAM_TEST_CASE(Blend, MatType, int)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
int channels;
|
int channels;
|
||||||
std::vector<cv::ocl::Info> oclinfo;
|
std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
|
|
||||||
type = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
channels = GET_PARAM(1);
|
channels = GET_PARAM(1);
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
//cv::ocl::setBinpath(CLBINPATH);
|
//cv::ocl::setBinpath(CLBINPATH);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(Blend, Performance)
|
TEST_P(Blend, Performance)
|
||||||
{
|
{
|
||||||
cv::Size size(MWIDTH, MHEIGHT);
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
|
cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
|
||||||
cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
|
cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
|
||||||
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
|
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
|
||||||
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
|
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
|
||||||
cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
|
cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
|
||||||
cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
|
cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
|
||||||
|
|
||||||
|
|
||||||
double totalgputick_all = 0;
|
double totalgputick_all = 0;
|
||||||
double totalgputick_kernel = 0;
|
double totalgputick_kernel = 0;
|
||||||
double t1 = 0;
|
double t1 = 0;
|
||||||
double t2 = 0;
|
double t2 = 0;
|
||||||
|
|
||||||
for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
|
for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
|
||||||
{
|
{
|
||||||
t1 = (double)cvGetTickCount();
|
t1 = (double)cvGetTickCount();
|
||||||
cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
|
cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
|
||||||
cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
|
cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
|
||||||
cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
|
cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
|
||||||
cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
|
cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
|
||||||
|
|
||||||
t2 = (double)cvGetTickCount();
|
t2 = (double)cvGetTickCount();
|
||||||
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
|
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
|
||||||
t2 = (double)cvGetTickCount() - t2;
|
t2 = (double)cvGetTickCount() - t2;
|
||||||
|
|
||||||
cv::Mat m;
|
cv::Mat m;
|
||||||
gdst.download(m);
|
gdst.download(m);
|
||||||
t1 = (double)cvGetTickCount() - t1;
|
t1 = (double)cvGetTickCount() - t1;
|
||||||
|
|
||||||
if (j == 0)
|
if (j == 0)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
totalgputick_all = t1 + totalgputick_all;
|
totalgputick_all = t1 + totalgputick_all;
|
||||||
totalgputick_kernel = t2 + totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
};
|
};
|
||||||
|
|
||||||
cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
|
||||||
|
|||||||
@@ -85,70 +85,70 @@ IMPLEMENT_PARAM_CLASS(L2gradient, bool);
|
|||||||
|
|
||||||
PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
|
PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
|
||||||
{
|
{
|
||||||
int apperture_size;
|
int apperture_size;
|
||||||
bool useL2gradient;
|
bool useL2gradient;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
apperture_size = GET_PARAM(0);
|
apperture_size = GET_PARAM(0);
|
||||||
useL2gradient = GET_PARAM(1);
|
useL2gradient = GET_PARAM(1);
|
||||||
|
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(Canny1, Performance)
|
TEST_P(Canny1, Performance)
|
||||||
{
|
{
|
||||||
cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE);
|
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
|
||||||
ASSERT_FALSE(img.empty());
|
ASSERT_FALSE(img.empty());
|
||||||
|
|
||||||
double low_thresh = 100.0;
|
double low_thresh = 100.0;
|
||||||
double high_thresh = 150.0;
|
double high_thresh = 150.0;
|
||||||
|
|
||||||
cv::Mat edges_gold;
|
cv::Mat edges_gold;
|
||||||
cv::ocl::oclMat edges;
|
cv::ocl::oclMat edges;
|
||||||
|
|
||||||
double totalgputick=0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel=0;
|
double totalgputick_kernel = 0;
|
||||||
|
|
||||||
double t1=0;
|
|
||||||
double t2=0;
|
|
||||||
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
|
||||||
{
|
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
double t1 = 0;
|
||||||
|
double t2 = 0;
|
||||||
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
|
for(int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
|
{
|
||||||
t2=(double)cvGetTickCount();//kernel
|
|
||||||
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
|
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
|
||||||
|
|
||||||
cv::Mat cpu_dst;
|
|
||||||
edges.download (cpu_dst);//download
|
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
|
||||||
|
|
||||||
if(j == 0)
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
continue;
|
|
||||||
|
|
||||||
totalgputick=t1+totalgputick;
|
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
|
||||||
|
|
||||||
totalgputick_kernel=t2+totalgputick_kernel;
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
|
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
|
||||||
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
}
|
cv::Mat cpu_dst;
|
||||||
|
edges.download (cpu_dst);//download
|
||||||
|
|
||||||
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
|
||||||
|
if(j == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
totalgputick = t1 + totalgputick;
|
||||||
|
|
||||||
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine(
|
||||||
testing::Values(AppertureSize(3), AppertureSize(5)),
|
testing::Values(AppertureSize(3), AppertureSize(5)),
|
||||||
testing::Values(L2gradient(false), L2gradient(true))));
|
testing::Values(L2gradient(false), L2gradient(true))));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@
|
|||||||
//
|
//
|
||||||
// @Authors
|
// @Authors
|
||||||
// Fangfang Bai fangfang@multicorewareinc.com
|
// Fangfang Bai fangfang@multicorewareinc.com
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
@@ -63,53 +63,53 @@ using namespace std;
|
|||||||
|
|
||||||
PARAM_TEST_CASE(ColumnSum)
|
PARAM_TEST_CASE(ColumnSum)
|
||||||
{
|
{
|
||||||
cv::Mat src;
|
cv::Mat src;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(ColumnSum, Performance)
|
TEST_F(ColumnSum, Performance)
|
||||||
{
|
{
|
||||||
cv::Size size(MWIDTH,MHEIGHT);
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
cv::Mat src = randomMat(size, CV_32FC1);
|
cv::Mat src = randomMat(size, CV_32FC1);
|
||||||
cv::ocl::oclMat d_dst;
|
cv::ocl::oclMat d_dst;
|
||||||
|
|
||||||
double totalgputick=0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel=0;
|
double totalgputick_kernel = 0;
|
||||||
double t1=0;
|
double t1 = 0;
|
||||||
double t2=0;
|
double t2 = 0;
|
||||||
|
|
||||||
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
for(int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
{
|
{
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
cv::ocl::oclMat d_src(src);
|
cv::ocl::oclMat d_src(src);
|
||||||
|
|
||||||
t2=(double)cvGetTickCount();//kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
cv::ocl::columnSum(d_src,d_dst);
|
cv::ocl::columnSum(d_src, d_dst);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
cv::Mat cpu_dst;
|
cv::Mat cpu_dst;
|
||||||
d_dst.download (cpu_dst);//download
|
d_dst.download (cpu_dst);//download
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
if(j == 0)
|
if(j == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
totalgputick=t1+totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
totalgputick_kernel=t2+totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -117,4 +117,4 @@ TEST_F(ColumnSum, Performance)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -48,75 +48,75 @@ using namespace std;
|
|||||||
#ifdef HAVE_CLAMDFFT
|
#ifdef HAVE_CLAMDFFT
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// Dft
|
// Dft
|
||||||
PARAM_TEST_CASE(Dft, cv::Size, bool)
|
PARAM_TEST_CASE(Dft, cv::Size, bool)
|
||||||
{
|
{
|
||||||
cv::Size dft_size;
|
cv::Size dft_size;
|
||||||
bool dft_rows;
|
bool dft_rows;
|
||||||
vector<cv::ocl::Info> info;
|
vector<cv::ocl::Info> info;
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
dft_size = GET_PARAM(0);
|
dft_size = GET_PARAM(0);
|
||||||
dft_rows = GET_PARAM(1);
|
dft_rows = GET_PARAM(1);
|
||||||
cv::ocl::getDevice(info);
|
cv::ocl::getDevice(info);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(Dft, C2C)
|
TEST_P(Dft, C2C)
|
||||||
{
|
{
|
||||||
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
|
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
flags |= dft_rows ? cv::DFT_ROWS : 0;
|
flags |= dft_rows ? cv::DFT_ROWS : 0;
|
||||||
|
|
||||||
cv::ocl::oclMat d_b;
|
cv::ocl::oclMat d_b;
|
||||||
|
|
||||||
double totalgputick=0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel=0;
|
double totalgputick_kernel = 0;
|
||||||
double t1=0;
|
double t1 = 0;
|
||||||
double t2=0;
|
double t2 = 0;
|
||||||
|
|
||||||
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
for(int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
{
|
{
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
cv::ocl::oclMat ga=cv::ocl::oclMat(a);//upload
|
cv::ocl::oclMat ga = cv::ocl::oclMat(a); //upload
|
||||||
|
|
||||||
t2=(double)cvGetTickCount();//kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
cv::ocl::dft(ga, d_b, a.size(), flags);
|
cv::ocl::dft(ga, d_b, a.size(), flags);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
cv::Mat cpu_dst;
|
cv::Mat cpu_dst;
|
||||||
d_b.download (cpu_dst);//download
|
d_b.download (cpu_dst);//download
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
if(j == 0)
|
if(j == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
totalgputick=t1+totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
totalgputick_kernel=t2+totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
TEST_P(Dft, R2CthenC2R)
|
TEST_P(Dft, R2CthenC2R)
|
||||||
{
|
{
|
||||||
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
|
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
|
||||||
|
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
|
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
|
||||||
|
|
||||||
cv::ocl::oclMat d_b, d_c;
|
cv::ocl::oclMat d_b, d_c;
|
||||||
|
|
||||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
||||||
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
|
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
|
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
|
//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -48,66 +48,66 @@ using namespace std;
|
|||||||
#ifdef HAVE_CLAMDBLAS
|
#ifdef HAVE_CLAMDBLAS
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// GEMM
|
// GEMM
|
||||||
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
|
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
cv::Size mat_size;
|
cv::Size mat_size;
|
||||||
int flags;
|
int flags;
|
||||||
vector<cv::ocl::Info> info;
|
vector<cv::ocl::Info> info;
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
type = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
mat_size = GET_PARAM(1);
|
mat_size = GET_PARAM(1);
|
||||||
flags = GET_PARAM(2);
|
flags = GET_PARAM(2);
|
||||||
|
|
||||||
cv::ocl::getDevice(info);
|
cv::ocl::getDevice(info);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(Gemm, Performance)
|
TEST_P(Gemm, Performance)
|
||||||
{
|
{
|
||||||
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
|
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
|
||||||
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
|
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
|
||||||
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
|
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
|
||||||
cv::ocl::oclMat ocl_dst;
|
cv::ocl::oclMat ocl_dst;
|
||||||
|
|
||||||
double totalgputick=0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel=0;
|
double totalgputick_kernel = 0;
|
||||||
double t1=0;
|
double t1 = 0;
|
||||||
double t2=0;
|
double t2 = 0;
|
||||||
|
|
||||||
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
for(int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
{
|
{
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
|
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
|
||||||
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
|
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
|
||||||
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
|
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
|
||||||
|
|
||||||
t2=(double)cvGetTickCount();//kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
cv::ocl::gemm(ga, gb, 1.0,gc, 1.0, ocl_dst, flags);
|
cv::ocl::gemm(ga, gb, 1.0, gc, 1.0, ocl_dst, flags);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
cv::Mat cpu_dst;
|
cv::Mat cpu_dst;
|
||||||
ocl_dst.download (cpu_dst);//download
|
ocl_dst.download (cpu_dst);//download
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end
|
t1 = (double)cvGetTickCount() - t1;//gpu end
|
||||||
|
|
||||||
if(j == 0)
|
if(j == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
totalgputick=t1+totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
totalgputick_kernel=t2+totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
}
|
}
|
||||||
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
|
||||||
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
|
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
|
||||||
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
|
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
|
||||||
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
|
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
|
||||||
#endif
|
#endif
|
||||||
@@ -53,118 +53,125 @@ using namespace testing;
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
|
||||||
struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
|
struct getRect
|
||||||
|
{
|
||||||
|
Rect operator ()(const CvAvgComp &e) const
|
||||||
|
{
|
||||||
|
return e.rect;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
PARAM_TEST_CASE(HaarTestBase, int, int)
|
PARAM_TEST_CASE(HaarTestBase, int, int)
|
||||||
{
|
{
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
|
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
|
||||||
cv::CascadeClassifier cpucascade, cpunestedCascade;
|
cv::CascadeClassifier cpucascade, cpunestedCascade;
|
||||||
// Mat img;
|
// Mat img;
|
||||||
|
|
||||||
double scale;
|
double scale;
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
scale = 1.0;
|
scale = 1.0;
|
||||||
index=0;
|
index = 0;
|
||||||
string cascadeName="../../../data/haarcascades/haarcascade_frontalface_alt.xml";
|
string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
|
||||||
|
|
||||||
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
|
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
|
||||||
{
|
{
|
||||||
cout << "ERROR: Could not load classifier cascade" << endl;
|
cout << "ERROR: Could not load classifier cascade" << endl;
|
||||||
cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
|
cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
|
||||||
" [--scale[=<image scale>\n"
|
" [--scale[=<image scale>\n"
|
||||||
" [filename|camera_index]\n" << endl ;
|
" [filename|camera_index]\n" << endl ;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums>0);
|
//CV_Assert(devnums>0);
|
||||||
////if you want to use undefault device, set it here
|
////if you want to use undefault device, set it here
|
||||||
////setDevice(oclinfo[0]);
|
////setDevice(oclinfo[0]);
|
||||||
//cv::ocl::setBinpath("E:\\");
|
//cv::ocl::setBinpath("E:\\");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////faceDetect/////////////////////////////////////////////////
|
////////////////////////////////faceDetect/////////////////////////////////////////////////
|
||||||
|
|
||||||
struct Haar : HaarTestBase {};
|
struct Haar : HaarTestBase {};
|
||||||
|
|
||||||
TEST_F(Haar, FaceDetect)
|
TEST_F(Haar, FaceDetect)
|
||||||
{
|
{
|
||||||
string imgName = "../../../samples/c/lena.jpg";
|
string imgName = "../../../samples/c/lena.jpg";
|
||||||
Mat img = imread( imgName, 1 );
|
Mat img = imread( imgName, 1 );
|
||||||
|
|
||||||
if(img.empty())
|
if(img.empty())
|
||||||
{
|
{
|
||||||
std::cout << "Couldn't read test" << index <<".jpg" << std::endl;
|
std::cout << "Couldn't read test" << index << ".jpg" << std::endl;
|
||||||
return ;
|
return ;
|
||||||
}
|
}
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
double t = 0;
|
double t = 0;
|
||||||
vector<Rect> faces, oclfaces;
|
vector<Rect> faces, oclfaces;
|
||||||
|
|
||||||
const static Scalar colors[] = { CV_RGB(0,0,255),
|
const static Scalar colors[] = { CV_RGB(0, 0, 255),
|
||||||
CV_RGB(0,128,255),
|
CV_RGB(0, 128, 255),
|
||||||
CV_RGB(0,255,255),
|
CV_RGB(0, 255, 255),
|
||||||
CV_RGB(0,255,0),
|
CV_RGB(0, 255, 0),
|
||||||
CV_RGB(255,128,0),
|
CV_RGB(255, 128, 0),
|
||||||
CV_RGB(255,255,0),
|
CV_RGB(255, 255, 0),
|
||||||
CV_RGB(255,0,0),
|
CV_RGB(255, 0, 0),
|
||||||
CV_RGB(255,0,255)} ;
|
CV_RGB(255, 0, 255)
|
||||||
|
} ;
|
||||||
|
|
||||||
Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
|
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
|
||||||
MemStorage storage(cvCreateMemStorage(0));
|
MemStorage storage(cvCreateMemStorage(0));
|
||||||
cvtColor( img, gray, CV_BGR2GRAY );
|
cvtColor( img, gray, CV_BGR2GRAY );
|
||||||
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
|
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
|
||||||
equalizeHist( smallImg, smallImg );
|
equalizeHist( smallImg, smallImg );
|
||||||
|
|
||||||
t = (double)cvGetTickCount();
|
t = (double)cvGetTickCount();
|
||||||
for(int k= 0; k<LOOP_TIMES; k++)
|
for(int k = 0; k < LOOP_TIMES; k++)
|
||||||
{
|
{
|
||||||
cpucascade.detectMultiScale( smallImg, faces, 1.1,
|
cpucascade.detectMultiScale( smallImg, faces, 1.1,
|
||||||
3, 0
|
3, 0
|
||||||
|CV_HAAR_SCALE_IMAGE
|
| CV_HAAR_SCALE_IMAGE
|
||||||
, Size(30,30), Size(0, 0) );
|
, Size(30, 30), Size(0, 0) );
|
||||||
}
|
}
|
||||||
t = (double)cvGetTickCount() - t ;
|
t = (double)cvGetTickCount() - t ;
|
||||||
printf( "cpudetection time = %g ms\n", t/(LOOP_TIMES*(double)cvGetTickFrequency()*1000.) );
|
printf( "cpudetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
|
||||||
|
|
||||||
cv::ocl::oclMat image;
|
cv::ocl::oclMat image;
|
||||||
CvSeq* _objects;
|
CvSeq *_objects;
|
||||||
t = (double)cvGetTickCount();
|
t = (double)cvGetTickCount();
|
||||||
for(int k= 0; k<LOOP_TIMES; k++)
|
for(int k = 0; k < LOOP_TIMES; k++)
|
||||||
{
|
{
|
||||||
image.upload(smallImg);
|
image.upload(smallImg);
|
||||||
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
|
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
|
||||||
3, 0
|
3, 0
|
||||||
|CV_HAAR_SCALE_IMAGE
|
| CV_HAAR_SCALE_IMAGE
|
||||||
, Size(30,30), Size(0, 0) );
|
, Size(30, 30), Size(0, 0) );
|
||||||
}
|
}
|
||||||
t = (double)cvGetTickCount() - t ;
|
t = (double)cvGetTickCount() - t ;
|
||||||
printf( "ocldetection time = %g ms\n", t/(LOOP_TIMES*(double)cvGetTickFrequency()*1000.) );
|
printf( "ocldetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
|
||||||
vector<CvAvgComp> vecAvgComp;
|
vector<CvAvgComp> vecAvgComp;
|
||||||
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
|
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
|
||||||
oclfaces.resize(vecAvgComp.size());
|
oclfaces.resize(vecAvgComp.size());
|
||||||
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
|
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
|
||||||
|
|
||||||
//for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
|
//for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
|
||||||
//{
|
//{
|
||||||
// Mat smallImgROI;
|
// Mat smallImgROI;
|
||||||
// Point center;
|
// Point center;
|
||||||
// Scalar color = colors[i%8];
|
// Scalar color = colors[i%8];
|
||||||
// int radius;
|
// int radius;
|
||||||
// center.x = cvRound((r->x + r->width*0.5)*scale);
|
// center.x = cvRound((r->x + r->width*0.5)*scale);
|
||||||
// center.y = cvRound((r->y + r->height*0.5)*scale);
|
// center.y = cvRound((r->y + r->height*0.5)*scale);
|
||||||
// radius = cvRound((r->width + r->height)*0.25*scale);
|
// radius = cvRound((r->width + r->height)*0.25*scale);
|
||||||
// circle( img, center, radius, color, 3, 8, 0 );
|
// circle( img, center, radius, color, 3, 8, 0 );
|
||||||
//}
|
//}
|
||||||
//namedWindow("result");
|
//namedWindow("result");
|
||||||
//imshow("result",img);
|
//imshow("result",img);
|
||||||
//waitKey(0);
|
//waitKey(0);
|
||||||
//destroyAllWindows();
|
//destroyAllWindows();
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
||||||
|
|||||||
@@ -46,16 +46,16 @@
|
|||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::ocl;
|
using namespace cv::ocl;
|
||||||
using namespace cvtest;
|
using namespace cvtest;
|
||||||
using namespace testing;
|
using namespace testing;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#define FILTER_IMAGE "../../../samples/gpu/road.png"
|
#define FILTER_IMAGE "../../../samples/gpu/road.png"
|
||||||
|
|
||||||
#ifndef MWC_TEST_UTILITY
|
#ifndef MWC_TEST_UTILITY
|
||||||
#define MWC_TEST_UTILITY
|
#define MWC_TEST_UTILITY
|
||||||
|
|
||||||
@@ -76,92 +76,92 @@ class name \
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif // IMPLEMENT_PARAM_CLASS
|
#endif // IMPLEMENT_PARAM_CLASS
|
||||||
#endif // MWC_TEST_UTILITY
|
#endif // MWC_TEST_UTILITY
|
||||||
|
|
||||||
IMPLEMENT_PARAM_CLASS(WinSizw48, bool);
|
IMPLEMENT_PARAM_CLASS(WinSizw48, bool);
|
||||||
|
|
||||||
PARAM_TEST_CASE(HOG, WinSizw48, bool)
|
PARAM_TEST_CASE(HOG, WinSizw48, bool)
|
||||||
{
|
{
|
||||||
bool is48;
|
bool is48;
|
||||||
vector<float> detector;
|
vector<float> detector;
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
is48 = GET_PARAM(0);
|
is48 = GET_PARAM(0);
|
||||||
if(is48)
|
if(is48)
|
||||||
{
|
{
|
||||||
detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96();
|
detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128();
|
detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(HOG, Performance)
|
TEST_P(HOG, Performance)
|
||||||
{
|
{
|
||||||
cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE);
|
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
|
||||||
ASSERT_FALSE(img.empty());
|
ASSERT_FALSE(img.empty());
|
||||||
|
|
||||||
// define HOG related arguments
|
// define HOG related arguments
|
||||||
float scale = 1.05;
|
float scale = 1.05;
|
||||||
int nlevels = 13;
|
int nlevels = 13;
|
||||||
float gr_threshold = 8;
|
float gr_threshold = 8;
|
||||||
float hit_threshold = 1.4;
|
float hit_threshold = 1.4;
|
||||||
bool hit_threshold_auto = true;
|
bool hit_threshold_auto = true;
|
||||||
|
|
||||||
int win_width = is48? 48 : 64;
|
int win_width = is48 ? 48 : 64;
|
||||||
int win_stride_width = 8;
|
int win_stride_width = 8;
|
||||||
int win_stride_height = 8;
|
int win_stride_height = 8;
|
||||||
|
|
||||||
bool gamma_corr = true;
|
bool gamma_corr = true;
|
||||||
|
|
||||||
Size win_size(win_width, win_width * 2); //(64, 128) or (48, 96)
|
Size win_size(win_width, win_width * 2); //(64, 128) or (48, 96)
|
||||||
Size win_stride(win_stride_width, win_stride_height);
|
Size win_stride(win_stride_width, win_stride_height);
|
||||||
|
|
||||||
cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
|
cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
|
||||||
cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
|
cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
|
||||||
cv::ocl::HOGDescriptor::DEFAULT_NLEVELS);
|
cv::ocl::HOGDescriptor::DEFAULT_NLEVELS);
|
||||||
|
|
||||||
gpu_hog.setSVMDetector(detector);
|
gpu_hog.setSVMDetector(detector);
|
||||||
|
|
||||||
double totalgputick=0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel=0;
|
double totalgputick_kernel = 0;
|
||||||
|
|
||||||
double t1=0;
|
double t1 = 0;
|
||||||
double t2=0;
|
double t2 = 0;
|
||||||
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
for(int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
{
|
{
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
ocl::oclMat d_src(img);//upload
|
ocl::oclMat d_src(img);//upload
|
||||||
|
|
||||||
t2=(double)cvGetTickCount();//kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
|
|
||||||
vector<Rect> found;
|
vector<Rect> found;
|
||||||
gpu_hog.detectMultiScale(d_src, found, hit_threshold, win_stride,
|
gpu_hog.detectMultiScale(d_src, found, hit_threshold, win_stride,
|
||||||
Size(0, 0), scale, gr_threshold);
|
Size(0, 0), scale, gr_threshold);
|
||||||
|
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
// no download time for HOG
|
// no download time for HOG
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
if(j == 0)
|
if(j == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
totalgputick=t1+totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
|
|
||||||
totalgputick_kernel=t2+totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, testing::Combine(testing::Values(WinSizw48(false), WinSizw48(true)), testing::Values(false)));
|
INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, testing::Combine(testing::Values(WinSizw48(false), WinSizw48(true)), testing::Values(false)));
|
||||||
|
|
||||||
#endif //Have opencl
|
#endif //Have opencl
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -87,76 +87,76 @@ IMPLEMENT_PARAM_CLASS(Channels, int)
|
|||||||
|
|
||||||
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
|
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
|
||||||
|
|
||||||
const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
|
const char *TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
|
||||||
|
|
||||||
PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod)
|
PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod)
|
||||||
{
|
{
|
||||||
cv::Size size;
|
cv::Size size;
|
||||||
cv::Size templ_size;
|
cv::Size templ_size;
|
||||||
int cn;
|
int cn;
|
||||||
int method;
|
int method;
|
||||||
//vector<cv::ocl::Info> oclinfo;
|
//vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
size = GET_PARAM(0);
|
size = GET_PARAM(0);
|
||||||
templ_size = GET_PARAM(1);
|
templ_size = GET_PARAM(1);
|
||||||
cn = GET_PARAM(2);
|
cn = GET_PARAM(2);
|
||||||
method = GET_PARAM(3);
|
method = GET_PARAM(3);
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct MatchTemplate8U : MatchTemplate {};
|
struct MatchTemplate8U : MatchTemplate {};
|
||||||
|
|
||||||
TEST_P(MatchTemplate8U, Performance)
|
TEST_P(MatchTemplate8U, Performance)
|
||||||
{
|
{
|
||||||
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
|
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
|
||||||
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
|
std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
|
||||||
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
|
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
|
||||||
std::cout << "Channels: " << cn << std::endl;
|
std::cout << "Channels: " << cn << std::endl;
|
||||||
|
|
||||||
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
|
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
|
||||||
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
|
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
|
||||||
cv::Mat dst_gold;
|
cv::Mat dst_gold;
|
||||||
cv::ocl::oclMat dst;
|
cv::ocl::oclMat dst;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double totalgputick=0;
|
|
||||||
double totalgputick_kernel=0;
|
|
||||||
|
|
||||||
double t1=0;
|
double totalgputick = 0;
|
||||||
double t2=0;
|
double totalgputick_kernel = 0;
|
||||||
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
|
||||||
{
|
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
double t1 = 0;
|
||||||
|
double t2 = 0;
|
||||||
|
for(int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
|
{
|
||||||
|
|
||||||
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
|
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
|
||||||
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
|
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
|
||||||
|
|
||||||
t2=(double)cvGetTickCount();//kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
|
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
cv::Mat cpu_dst;
|
cv::Mat cpu_dst;
|
||||||
dst.download (cpu_dst);//download
|
dst.download (cpu_dst);//download
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
if(j == 0)
|
if(j == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
totalgputick=t1+totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
totalgputick_kernel=t2+totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -165,68 +165,68 @@ TEST_P(MatchTemplate8U, Performance)
|
|||||||
struct MatchTemplate32F : MatchTemplate {};
|
struct MatchTemplate32F : MatchTemplate {};
|
||||||
TEST_P(MatchTemplate32F, Performance)
|
TEST_P(MatchTemplate32F, Performance)
|
||||||
{
|
{
|
||||||
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
|
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
|
||||||
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
|
std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
|
||||||
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
|
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
|
||||||
std::cout << "Channels: " << cn << std::endl;
|
std::cout << "Channels: " << cn << std::endl;
|
||||||
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
|
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
|
||||||
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
|
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
|
||||||
|
|
||||||
cv::Mat dst_gold;
|
cv::Mat dst_gold;
|
||||||
cv::ocl::oclMat dst;
|
cv::ocl::oclMat dst;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double totalgputick=0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel=0;
|
double totalgputick_kernel = 0;
|
||||||
|
|
||||||
double t1=0;
|
double t1 = 0;
|
||||||
double t2=0;
|
double t2 = 0;
|
||||||
for(int j = 0; j < LOOP_TIMES; j ++)
|
for(int j = 0; j < LOOP_TIMES; j ++)
|
||||||
{
|
{
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
|
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
|
||||||
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
|
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
|
||||||
|
|
||||||
t2=(double)cvGetTickCount();//kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
|
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
cv::Mat cpu_dst;
|
cv::Mat cpu_dst;
|
||||||
dst.download (cpu_dst);//download
|
dst.download (cpu_dst);//download
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
totalgputick=t1+totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
|
|
||||||
totalgputick_kernel=t2+totalgputick_kernel;
|
|
||||||
|
|
||||||
}
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
}
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
|
||||||
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
|
||||||
testing::Combine(
|
testing::Combine(
|
||||||
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)),
|
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)),
|
||||||
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
||||||
testing::Values(Channels(1), Channels(4)/*, Channels(3)*/),
|
testing::Values(Channels(1), Channels(4)/*, Channels(3)*/),
|
||||||
ALL_TEMPLATE_METHODS
|
ALL_TEMPLATE_METHODS
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
|
||||||
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)),
|
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)),
|
||||||
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
||||||
testing::Values(Channels(1), Channels(4) /*, Channels(3)*/),
|
testing::Values(Channels(1), Channels(4) /*, Channels(3)*/),
|
||||||
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
|
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
|
||||||
|
|
||||||
#endif //HAVE_OPENCL
|
#endif //HAVE_OPENCL
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -56,28 +56,28 @@ using namespace std;
|
|||||||
|
|
||||||
PARAM_TEST_CASE(PyrDown, MatType, int)
|
PARAM_TEST_CASE(PyrDown, MatType, int)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
int channels;
|
int channels;
|
||||||
//src mat
|
//src mat
|
||||||
cv::Mat mat1;
|
cv::Mat mat1;
|
||||||
cv::Mat dst;
|
cv::Mat dst;
|
||||||
|
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
//ocl dst mat for testing
|
//ocl dst mat for testing
|
||||||
|
|
||||||
cv::ocl::oclMat gmat1;
|
cv::ocl::oclMat gmat1;
|
||||||
cv::ocl::oclMat gdst;
|
cv::ocl::oclMat gdst;
|
||||||
|
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
type = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
channels = GET_PARAM(1);
|
channels = GET_PARAM(1);
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define VARNAME(A) string(#A);
|
#define VARNAME(A) string(#A);
|
||||||
@@ -85,48 +85,48 @@ PARAM_TEST_CASE(PyrDown, MatType, int)
|
|||||||
////////////////////////////////PyrDown/////////////////////////////////////////////////
|
////////////////////////////////PyrDown/////////////////////////////////////////////////
|
||||||
TEST_P(PyrDown, Mat)
|
TEST_P(PyrDown, Mat)
|
||||||
{
|
{
|
||||||
cv::Size size(MWIDTH, MHEIGHT);
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
|
mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
|
||||||
|
|
||||||
|
|
||||||
cv::ocl::oclMat gdst;
|
cv::ocl::oclMat gdst;
|
||||||
double totalgputick = 0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel = 0;
|
double totalgputick_kernel = 0;
|
||||||
|
|
||||||
double t1 = 0;
|
double t1 = 0;
|
||||||
double t2 = 0;
|
double t2 = 0;
|
||||||
|
|
||||||
for (int j = 0; j < LOOP_TIMES + 1; j ++)
|
for (int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
{
|
{
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
cv::ocl::oclMat gmat1(mat1);
|
cv::ocl::oclMat gmat1(mat1);
|
||||||
|
|
||||||
t2 = (double)cvGetTickCount(); //kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
cv::ocl::pyrDown(gmat1, gdst);
|
cv::ocl::pyrDown(gmat1, gdst);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
cv::Mat cpu_dst;
|
cv::Mat cpu_dst;
|
||||||
gdst.download(cpu_dst);
|
gdst.download(cpu_dst);
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
if (j == 0)
|
if (j == 0)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
totalgputick = t1 + totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
|
|
||||||
totalgputick_kernel = t2 + totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//********test****************
|
//********test****************
|
||||||
|
|||||||
@@ -56,64 +56,64 @@ using namespace std;
|
|||||||
|
|
||||||
PARAM_TEST_CASE(PyrUp, MatType, int)
|
PARAM_TEST_CASE(PyrUp, MatType, int)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
int channels;
|
int channels;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
type = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
channels = GET_PARAM(1);
|
channels = GET_PARAM(1);
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(PyrUp, Performance)
|
TEST_P(PyrUp, Performance)
|
||||||
{
|
{
|
||||||
cv::Size size(MWIDTH, MHEIGHT);
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels));
|
cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels));
|
||||||
cv::Mat dst_gold;
|
cv::Mat dst_gold;
|
||||||
cv::ocl::oclMat dst;
|
cv::ocl::oclMat dst;
|
||||||
|
|
||||||
|
|
||||||
double totalgputick = 0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel = 0;
|
double totalgputick_kernel = 0;
|
||||||
|
|
||||||
double t1 = 0;
|
double t1 = 0;
|
||||||
double t2 = 0;
|
double t2 = 0;
|
||||||
|
|
||||||
for (int j = 0; j < LOOP_TIMES + 1; j ++)
|
for (int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
{
|
{
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload
|
cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload
|
||||||
|
|
||||||
t2 = (double)cvGetTickCount(); //kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
cv::ocl::pyrUp(srcMat, dst);
|
cv::ocl::pyrUp(srcMat, dst);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
cv::Mat cpu_dst;
|
cv::Mat cpu_dst;
|
||||||
dst.download(cpu_dst); //download
|
dst.download(cpu_dst); //download
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
if (j == 0)
|
if (j == 0)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
totalgputick = t1 + totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
|
|
||||||
totalgputick_kernel = t2 + totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine(
|
||||||
|
|||||||
@@ -53,403 +53,435 @@ using namespace std;
|
|||||||
using namespace cv::ocl;
|
using namespace cv::ocl;
|
||||||
PARAM_TEST_CASE(MergeTestBase, MatType, int)
|
PARAM_TEST_CASE(MergeTestBase, MatType, int)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
int channels;
|
int channels;
|
||||||
|
|
||||||
//src mat
|
//src mat
|
||||||
cv::Mat mat1;
|
cv::Mat mat1;
|
||||||
cv::Mat mat2;
|
cv::Mat mat2;
|
||||||
cv::Mat mat3;
|
cv::Mat mat3;
|
||||||
cv::Mat mat4;
|
cv::Mat mat4;
|
||||||
|
|
||||||
//dst mat
|
//dst mat
|
||||||
cv::Mat dst;
|
cv::Mat dst;
|
||||||
|
|
||||||
// set up roi
|
// set up roi
|
||||||
int roicols;
|
int roicols;
|
||||||
int roirows;
|
int roirows;
|
||||||
int src1x;
|
int src1x;
|
||||||
int src1y;
|
int src1y;
|
||||||
int src2x;
|
int src2x;
|
||||||
int src2y;
|
int src2y;
|
||||||
int src3x;
|
int src3x;
|
||||||
int src3y;
|
int src3y;
|
||||||
int src4x;
|
int src4x;
|
||||||
int src4y;
|
int src4y;
|
||||||
int dstx;
|
int dstx;
|
||||||
int dsty;
|
int dsty;
|
||||||
|
|
||||||
//src mat with roi
|
//src mat with roi
|
||||||
cv::Mat mat1_roi;
|
cv::Mat mat1_roi;
|
||||||
cv::Mat mat2_roi;
|
cv::Mat mat2_roi;
|
||||||
cv::Mat mat3_roi;
|
cv::Mat mat3_roi;
|
||||||
cv::Mat mat4_roi;
|
cv::Mat mat4_roi;
|
||||||
|
|
||||||
//dst mat with roi
|
//dst mat with roi
|
||||||
cv::Mat dst_roi;
|
cv::Mat dst_roi;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
//ocl dst mat for testing
|
//ocl dst mat for testing
|
||||||
cv::ocl::oclMat gdst_whole;
|
cv::ocl::oclMat gdst_whole;
|
||||||
|
|
||||||
//ocl mat with roi
|
//ocl mat with roi
|
||||||
cv::ocl::oclMat gmat1;
|
cv::ocl::oclMat gmat1;
|
||||||
cv::ocl::oclMat gmat2;
|
cv::ocl::oclMat gmat2;
|
||||||
cv::ocl::oclMat gmat3;
|
cv::ocl::oclMat gmat3;
|
||||||
cv::ocl::oclMat gmat4;
|
cv::ocl::oclMat gmat4;
|
||||||
cv::ocl::oclMat gdst;
|
cv::ocl::oclMat gdst;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
type = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
channels = GET_PARAM(1);
|
channels = GET_PARAM(1);
|
||||||
|
|
||||||
cv::RNG& rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
cv::Size size(MWIDTH, MHEIGHT);
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
|
|
||||||
mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||||
mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||||
mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||||
mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||||
dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
|
dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
////if you want to use undefault device, set it here
|
////if you want to use undefault device, set it here
|
||||||
////setDevice(oclinfo[0]);
|
////setDevice(oclinfo[0]);
|
||||||
//setBinpath(CLBINPATH);
|
//setBinpath(CLBINPATH);
|
||||||
}
|
}
|
||||||
void Has_roi(int b)
|
void Has_roi(int b)
|
||||||
{
|
{
|
||||||
//cv::RNG& rng = TS::ptr()->get_rng();
|
//cv::RNG& rng = TS::ptr()->get_rng();
|
||||||
if(b)
|
if(b)
|
||||||
{
|
{
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
roicols = mat1.cols-1; //start
|
roicols = mat1.cols - 1; //start
|
||||||
roirows = mat1.rows-1;
|
roirows = mat1.rows - 1;
|
||||||
src1x = 1;
|
src1x = 1;
|
||||||
src1y = 1;
|
src1y = 1;
|
||||||
src2x = 1;
|
src2x = 1;
|
||||||
src2y = 1;
|
src2y = 1;
|
||||||
src3x = 1;
|
src3x = 1;
|
||||||
src3y = 1;
|
src3y = 1;
|
||||||
src4x = 1;
|
src4x = 1;
|
||||||
src4y = 1;
|
src4y = 1;
|
||||||
dstx = 1;
|
dstx = 1;
|
||||||
dsty =1;
|
dsty = 1;
|
||||||
|
|
||||||
}else
|
}
|
||||||
{
|
else
|
||||||
roicols = mat1.cols;
|
{
|
||||||
roirows = mat1.rows;
|
roicols = mat1.cols;
|
||||||
src1x = 0;
|
roirows = mat1.rows;
|
||||||
src1y = 0;
|
src1x = 0;
|
||||||
src2x = 0;
|
src1y = 0;
|
||||||
src2y = 0;
|
src2x = 0;
|
||||||
src3x = 0;
|
src2y = 0;
|
||||||
src3y = 0;
|
src3x = 0;
|
||||||
src4x = 0;
|
src3y = 0;
|
||||||
src4y = 0;
|
src4x = 0;
|
||||||
dstx = 0;
|
src4y = 0;
|
||||||
dsty = 0;
|
dstx = 0;
|
||||||
};
|
dsty = 0;
|
||||||
|
};
|
||||||
|
|
||||||
mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
|
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
|
||||||
mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
|
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
|
||||||
mat3_roi = mat3(Rect(src3x,src3y,roicols,roirows));
|
mat3_roi = mat3(Rect(src3x, src3y, roicols, roirows));
|
||||||
mat4_roi = mat4(Rect(src4x,src4y,roicols,roirows));
|
mat4_roi = mat4(Rect(src4x, src4y, roicols, roirows));
|
||||||
|
|
||||||
|
|
||||||
dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
|
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Merge : MergeTestBase {};
|
struct Merge : MergeTestBase {};
|
||||||
|
|
||||||
TEST_P(Merge, Accuracy)
|
TEST_P(Merge, Accuracy)
|
||||||
{
|
{
|
||||||
#ifndef PRINT_KERNEL_RUN_TIME
|
#ifndef PRINT_KERNEL_RUN_TIME
|
||||||
double totalcputick=0;
|
double totalcputick = 0;
|
||||||
double totalgputick=0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel=0;
|
double totalgputick_kernel = 0;
|
||||||
double t0=0;
|
double t0 = 0;
|
||||||
double t1=0;
|
double t1 = 0;
|
||||||
double t2=0;
|
double t2 = 0;
|
||||||
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
|
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
|
||||||
totalcputick=0;
|
{
|
||||||
totalgputick=0;
|
totalcputick = 0;
|
||||||
totalgputick_kernel=0;
|
totalgputick = 0;
|
||||||
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
totalgputick_kernel = 0;
|
||||||
{
|
for(int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
Has_roi(k);
|
{
|
||||||
std::vector<cv::Mat> dev_src;
|
Has_roi(k);
|
||||||
dev_src.push_back(mat1_roi);
|
std::vector<cv::Mat> dev_src;
|
||||||
dev_src.push_back(mat2_roi);
|
dev_src.push_back(mat1_roi);
|
||||||
dev_src.push_back(mat3_roi);
|
dev_src.push_back(mat2_roi);
|
||||||
dev_src.push_back(mat4_roi);
|
dev_src.push_back(mat3_roi);
|
||||||
t0 = (double)cvGetTickCount();//cpu start
|
dev_src.push_back(mat4_roi);
|
||||||
cv::merge(dev_src, dst_roi);
|
t0 = (double)cvGetTickCount();//cpu start
|
||||||
t0 = (double)cvGetTickCount() - t0;//cpu end
|
cv::merge(dev_src, dst_roi);
|
||||||
|
t0 = (double)cvGetTickCount() - t0;//cpu end
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount();//gpu start1 ]
|
t1 = (double)cvGetTickCount();//gpu start1 ]
|
||||||
gmat1 = mat1_roi;
|
gmat1 = mat1_roi;
|
||||||
gmat2 = mat2_roi;
|
gmat2 = mat2_roi;
|
||||||
gmat3 = mat3_roi;
|
gmat3 = mat3_roi;
|
||||||
gmat4 = mat4_roi;
|
gmat4 = mat4_roi;
|
||||||
gdst_whole = dst;
|
gdst_whole = dst;
|
||||||
gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
|
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
|
||||||
std::vector<cv::ocl::oclMat> dev_gsrc;
|
std::vector<cv::ocl::oclMat> dev_gsrc;
|
||||||
dev_gsrc.push_back(gmat1);
|
dev_gsrc.push_back(gmat1);
|
||||||
dev_gsrc.push_back(gmat2);
|
dev_gsrc.push_back(gmat2);
|
||||||
dev_gsrc.push_back(gmat3);
|
dev_gsrc.push_back(gmat3);
|
||||||
dev_gsrc.push_back(gmat4);
|
dev_gsrc.push_back(gmat4);
|
||||||
t2=(double)cvGetTickCount();//kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
cv::ocl::merge(dev_gsrc, gdst);
|
cv::ocl::merge(dev_gsrc, gdst);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
cv::Mat cpu_dst;
|
cv::Mat cpu_dst;
|
||||||
gdst_whole.download (cpu_dst);//download
|
gdst_whole.download (cpu_dst);//download
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
if(j == 0)
|
if(j == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
totalgputick=t1+totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
totalcputick=t0+totalcputick;
|
totalcputick = t0 + totalcputick;
|
||||||
totalgputick_kernel=t2+totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
}
|
}
|
||||||
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
|
if(k == 0)
|
||||||
cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
{
|
||||||
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "no roi\n";
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
}
|
||||||
}
|
else
|
||||||
|
{
|
||||||
|
cout << "with roi\n";
|
||||||
|
};
|
||||||
|
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
|
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
|
||||||
{
|
{
|
||||||
Has_roi(j);
|
Has_roi(j);
|
||||||
gmat1 = mat1_roi;
|
gmat1 = mat1_roi;
|
||||||
gmat2 = mat2_roi;
|
gmat2 = mat2_roi;
|
||||||
gmat3 = mat3_roi;
|
gmat3 = mat3_roi;
|
||||||
gmat4 = mat4_roi;
|
gmat4 = mat4_roi;
|
||||||
gdst_whole = dst;
|
gdst_whole = dst;
|
||||||
gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
|
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
|
||||||
std::vector<cv::ocl::oclMat> dev_gsrc;
|
std::vector<cv::ocl::oclMat> dev_gsrc;
|
||||||
dev_gsrc.push_back(gmat1);
|
dev_gsrc.push_back(gmat1);
|
||||||
dev_gsrc.push_back(gmat2);
|
dev_gsrc.push_back(gmat2);
|
||||||
dev_gsrc.push_back(gmat3);
|
dev_gsrc.push_back(gmat3);
|
||||||
dev_gsrc.push_back(gmat4);
|
dev_gsrc.push_back(gmat4);
|
||||||
|
|
||||||
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
|
if(j == 0)
|
||||||
cv::ocl::merge(dev_gsrc, gdst);
|
{
|
||||||
};
|
cout << "no roi:";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cout << "\nwith roi:";
|
||||||
|
};
|
||||||
|
cv::ocl::merge(dev_gsrc, gdst);
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
PARAM_TEST_CASE(SplitTestBase, MatType, int)
|
PARAM_TEST_CASE(SplitTestBase, MatType, int)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
int channels;
|
int channels;
|
||||||
|
|
||||||
//src mat
|
//src mat
|
||||||
cv::Mat mat;
|
cv::Mat mat;
|
||||||
|
|
||||||
//dstmat
|
//dstmat
|
||||||
cv::Mat dst1;
|
cv::Mat dst1;
|
||||||
cv::Mat dst2;
|
cv::Mat dst2;
|
||||||
cv::Mat dst3;
|
cv::Mat dst3;
|
||||||
cv::Mat dst4;
|
cv::Mat dst4;
|
||||||
|
|
||||||
// set up roi
|
// set up roi
|
||||||
int roicols;
|
int roicols;
|
||||||
int roirows;
|
int roirows;
|
||||||
int srcx;
|
int srcx;
|
||||||
int srcy;
|
int srcy;
|
||||||
int dst1x;
|
int dst1x;
|
||||||
int dst1y;
|
int dst1y;
|
||||||
int dst2x;
|
int dst2x;
|
||||||
int dst2y;
|
int dst2y;
|
||||||
int dst3x;
|
int dst3x;
|
||||||
int dst3y;
|
int dst3y;
|
||||||
int dst4x;
|
int dst4x;
|
||||||
int dst4y;
|
int dst4y;
|
||||||
|
|
||||||
//src mat with roi
|
//src mat with roi
|
||||||
cv::Mat mat_roi;
|
cv::Mat mat_roi;
|
||||||
|
|
||||||
//dst mat with roi
|
//dst mat with roi
|
||||||
cv::Mat dst1_roi;
|
cv::Mat dst1_roi;
|
||||||
cv::Mat dst2_roi;
|
cv::Mat dst2_roi;
|
||||||
cv::Mat dst3_roi;
|
cv::Mat dst3_roi;
|
||||||
cv::Mat dst4_roi;
|
cv::Mat dst4_roi;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
//ocl dst mat for testing
|
//ocl dst mat for testing
|
||||||
cv::ocl::oclMat gdst1_whole;
|
cv::ocl::oclMat gdst1_whole;
|
||||||
cv::ocl::oclMat gdst2_whole;
|
cv::ocl::oclMat gdst2_whole;
|
||||||
cv::ocl::oclMat gdst3_whole;
|
cv::ocl::oclMat gdst3_whole;
|
||||||
cv::ocl::oclMat gdst4_whole;
|
cv::ocl::oclMat gdst4_whole;
|
||||||
|
|
||||||
//ocl mat with roi
|
//ocl mat with roi
|
||||||
cv::ocl::oclMat gmat;
|
cv::ocl::oclMat gmat;
|
||||||
cv::ocl::oclMat gdst1;
|
cv::ocl::oclMat gdst1;
|
||||||
cv::ocl::oclMat gdst2;
|
cv::ocl::oclMat gdst2;
|
||||||
cv::ocl::oclMat gdst3;
|
cv::ocl::oclMat gdst3;
|
||||||
cv::ocl::oclMat gdst4;
|
cv::ocl::oclMat gdst4;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
type = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
channels = GET_PARAM(1);
|
channels = GET_PARAM(1);
|
||||||
|
|
||||||
cv::RNG& rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
cv::Size size(MWIDTH, MHEIGHT);
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
|
|
||||||
mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
|
mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
|
||||||
dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||||
dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||||
dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||||
dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
////if you want to use undefault device, set it here
|
////if you want to use undefault device, set it here
|
||||||
////setDevice(oclinfo[0]);
|
////setDevice(oclinfo[0]);
|
||||||
//setBinpath(CLBINPATH);
|
//setBinpath(CLBINPATH);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Has_roi(int b)
|
void Has_roi(int b)
|
||||||
{
|
{
|
||||||
//cv::RNG& rng = TS::ptr()->get_rng();
|
//cv::RNG& rng = TS::ptr()->get_rng();
|
||||||
if(b)
|
if(b)
|
||||||
{
|
{
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
roicols = mat.cols-1; //start
|
roicols = mat.cols - 1; //start
|
||||||
roirows = mat.rows-1;
|
roirows = mat.rows - 1;
|
||||||
srcx = 1;
|
srcx = 1;
|
||||||
srcx = 1;
|
srcx = 1;
|
||||||
dst1x = 1;
|
dst1x = 1;
|
||||||
dst1y =1;
|
dst1y = 1;
|
||||||
dst2x = 1;
|
dst2x = 1;
|
||||||
dst2y =1;
|
dst2y = 1;
|
||||||
dst3x = 1;
|
dst3x = 1;
|
||||||
dst3y =1;
|
dst3y = 1;
|
||||||
dst4x = 1;
|
dst4x = 1;
|
||||||
dst4y =1;
|
dst4y = 1;
|
||||||
}else
|
}
|
||||||
{
|
else
|
||||||
roicols = mat.cols;
|
{
|
||||||
roirows = mat.rows;
|
roicols = mat.cols;
|
||||||
srcx = 0;
|
roirows = mat.rows;
|
||||||
srcy = 0;
|
srcx = 0;
|
||||||
dst1x = 0;
|
srcy = 0;
|
||||||
dst1y = 0;
|
dst1x = 0;
|
||||||
dst2x = 0;
|
dst1y = 0;
|
||||||
dst2y =0;
|
dst2x = 0;
|
||||||
dst3x = 0;
|
dst2y = 0;
|
||||||
dst3y =0;
|
dst3x = 0;
|
||||||
dst4x = 0;
|
dst3y = 0;
|
||||||
dst4y =0;
|
dst4x = 0;
|
||||||
};
|
dst4y = 0;
|
||||||
|
};
|
||||||
|
|
||||||
mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
|
mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
|
||||||
|
|
||||||
dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
|
dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows));
|
||||||
dst2_roi = dst2(Rect(dst2x,dst2y,roicols,roirows));
|
dst2_roi = dst2(Rect(dst2x, dst2y, roicols, roirows));
|
||||||
dst3_roi = dst3(Rect(dst3x,dst3y,roicols,roirows));
|
dst3_roi = dst3(Rect(dst3x, dst3y, roicols, roirows));
|
||||||
dst4_roi = dst4(Rect(dst4x,dst4y,roicols,roirows));
|
dst4_roi = dst4(Rect(dst4x, dst4y, roicols, roirows));
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Split :SplitTestBase {};
|
struct Split : SplitTestBase {};
|
||||||
|
|
||||||
TEST_P(Split, Accuracy)
|
TEST_P(Split, Accuracy)
|
||||||
{
|
{
|
||||||
#ifndef PRINT_KERNEL_RUN_TIME
|
#ifndef PRINT_KERNEL_RUN_TIME
|
||||||
double totalcputick=0;
|
double totalcputick = 0;
|
||||||
double totalgputick=0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel=0;
|
double totalgputick_kernel = 0;
|
||||||
double t0=0;
|
double t0 = 0;
|
||||||
double t1=0;
|
double t1 = 0;
|
||||||
double t2=0;
|
double t2 = 0;
|
||||||
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
|
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
|
||||||
totalcputick=0;
|
{
|
||||||
totalgputick=0;
|
totalcputick = 0;
|
||||||
totalgputick_kernel=0;
|
totalgputick = 0;
|
||||||
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
totalgputick_kernel = 0;
|
||||||
{
|
for(int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
Has_roi(k);
|
{
|
||||||
cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
|
Has_roi(k);
|
||||||
cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
|
cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
|
||||||
t0 = (double)cvGetTickCount();//cpu start
|
cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
|
||||||
cv::split(mat_roi, dev_dst);
|
t0 = (double)cvGetTickCount();//cpu start
|
||||||
t0 = (double)cvGetTickCount() - t0;//cpu end
|
cv::split(mat_roi, dev_dst);
|
||||||
|
t0 = (double)cvGetTickCount() - t0;//cpu end
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
gdst1_whole = dst1;
|
gdst1_whole = dst1;
|
||||||
gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows));
|
gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows));
|
||||||
|
|
||||||
gdst2_whole = dst2;
|
gdst2_whole = dst2;
|
||||||
gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows));
|
gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows));
|
||||||
|
|
||||||
gdst3_whole = dst3;
|
gdst3_whole = dst3;
|
||||||
gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows));
|
gdst3 = gdst3_whole(Rect(dst3x, dst3y, roicols, roirows));
|
||||||
|
|
||||||
gdst4_whole = dst4;
|
gdst4_whole = dst4;
|
||||||
gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows));
|
gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows));
|
||||||
|
|
||||||
gmat = mat_roi;
|
gmat = mat_roi;
|
||||||
t2=(double)cvGetTickCount();//kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
cv::ocl::split(gmat, dev_gdst);
|
cv::ocl::split(gmat, dev_gdst);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
cv::Mat cpu_dst1;
|
cv::Mat cpu_dst1;
|
||||||
cv::Mat cpu_dst2;
|
cv::Mat cpu_dst2;
|
||||||
cv::Mat cpu_dst3;
|
cv::Mat cpu_dst3;
|
||||||
cv::Mat cpu_dst4;
|
cv::Mat cpu_dst4;
|
||||||
gdst1_whole.download(cpu_dst1);
|
gdst1_whole.download(cpu_dst1);
|
||||||
gdst2_whole.download(cpu_dst2);
|
gdst2_whole.download(cpu_dst2);
|
||||||
gdst3_whole.download(cpu_dst3);
|
gdst3_whole.download(cpu_dst3);
|
||||||
gdst4_whole.download(cpu_dst4);
|
gdst4_whole.download(cpu_dst4);
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
if(j == 0)
|
if(j == 0)
|
||||||
continue;
|
continue;
|
||||||
totalgputick=t1+totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
totalcputick=t0+totalcputick;
|
totalcputick = t0 + totalcputick;
|
||||||
totalgputick_kernel=t2+totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
}
|
}
|
||||||
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
|
if(k == 0)
|
||||||
cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
{
|
||||||
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "no roi\n";
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
}
|
||||||
}
|
else
|
||||||
|
{
|
||||||
|
cout << "with roi\n";
|
||||||
|
};
|
||||||
|
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
|
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
|
||||||
{
|
{
|
||||||
Has_roi(j);
|
Has_roi(j);
|
||||||
cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
|
cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
|
||||||
cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
|
cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
|
||||||
gdst1_whole = dst1;
|
gdst1_whole = dst1;
|
||||||
gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows));
|
gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows));
|
||||||
|
|
||||||
gdst2_whole = dst2;
|
gdst2_whole = dst2;
|
||||||
gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows));
|
gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows));
|
||||||
|
|
||||||
gdst3_whole = dst3;
|
gdst3_whole = dst3;
|
||||||
gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows));
|
gdst3 = gdst3_whole(Rect(dst3x, dst3y, roicols, roirows));
|
||||||
|
|
||||||
gdst4_whole = dst4;
|
gdst4_whole = dst4;
|
||||||
gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows));
|
gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows));
|
||||||
gmat = mat_roi;
|
gmat = mat_roi;
|
||||||
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
|
if(j == 0)
|
||||||
cv::ocl::split(gmat, dev_gdst);
|
{
|
||||||
};
|
cout << "no roi:";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cout << "\nwith roi:";
|
||||||
|
};
|
||||||
|
cv::ocl::split(gmat, dev_gdst);
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
//*************test*****************
|
//*************test*****************
|
||||||
INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
|
INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
|
||||||
Values(CV_8UC4, CV_32FC4), Values(1, 4)));
|
Values(CV_8UC4, CV_32FC4), Values(1, 4)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
|
INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
|
||||||
Values(CV_8U, CV_32S, CV_32F), Values(1, 4)));
|
Values(CV_8U, CV_32S, CV_32F), Values(1, 4)));
|
||||||
|
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
||||||
|
|||||||
@@ -46,58 +46,58 @@
|
|||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::ocl;
|
using namespace cv::ocl;
|
||||||
using namespace cvtest;
|
using namespace cvtest;
|
||||||
using namespace testing;
|
using namespace testing;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#define FILTER_IMAGE "../../../samples/gpu/road.png"
|
#define FILTER_IMAGE "../../../samples/gpu/road.png"
|
||||||
|
|
||||||
TEST(SURF, Performance)
|
TEST(SURF, Performance)
|
||||||
{
|
{
|
||||||
cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE);
|
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
|
||||||
ASSERT_FALSE(img.empty());
|
ASSERT_FALSE(img.empty());
|
||||||
|
|
||||||
ocl::SURF_OCL d_surf;
|
ocl::SURF_OCL d_surf;
|
||||||
ocl::oclMat d_keypoints;
|
ocl::oclMat d_keypoints;
|
||||||
ocl::oclMat d_descriptors;
|
ocl::oclMat d_descriptors;
|
||||||
|
|
||||||
double totalgputick=0;
|
double totalgputick = 0;
|
||||||
double totalgputick_kernel=0;
|
double totalgputick_kernel = 0;
|
||||||
|
|
||||||
double t1=0;
|
double t1 = 0;
|
||||||
double t2=0;
|
double t2 = 0;
|
||||||
for(int j = 0; j < LOOP_TIMES+1; j ++)
|
for(int j = 0; j < LOOP_TIMES + 1; j ++)
|
||||||
{
|
{
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
|
|
||||||
ocl::oclMat d_src(img);//upload
|
ocl::oclMat d_src(img);//upload
|
||||||
|
|
||||||
t2=(double)cvGetTickCount();//kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);
|
d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
|
|
||||||
cv::Mat cpu_kp, cpu_dp;
|
cv::Mat cpu_kp, cpu_dp;
|
||||||
d_keypoints.download (cpu_kp);//download
|
d_keypoints.download (cpu_kp);//download
|
||||||
d_descriptors.download (cpu_dp);//download
|
d_descriptors.download (cpu_dp);//download
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
|
|
||||||
if(j == 0)
|
if(j == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
totalgputick=t1+totalgputick;
|
totalgputick = t1 + totalgputick;
|
||||||
|
|
||||||
totalgputick_kernel=t2+totalgputick_kernel;
|
totalgputick_kernel = t2 + totalgputick_kernel;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
|
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif //Have opencl
|
#endif //Have opencl
|
||||||
@@ -42,4 +42,3 @@
|
|||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -75,13 +75,13 @@ using namespace cvtest;
|
|||||||
|
|
||||||
int randomInt(int minVal, int maxVal)
|
int randomInt(int minVal, int maxVal)
|
||||||
{
|
{
|
||||||
RNG& rng = TS::ptr()->get_rng();
|
RNG &rng = TS::ptr()->get_rng();
|
||||||
return rng.uniform(minVal, maxVal);
|
return rng.uniform(minVal, maxVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
double randomDouble(double minVal, double maxVal)
|
double randomDouble(double minVal, double maxVal)
|
||||||
{
|
{
|
||||||
RNG& rng = TS::ptr()->get_rng();
|
RNG &rng = TS::ptr()->get_rng();
|
||||||
return rng.uniform(minVal, maxVal);
|
return rng.uniform(minVal, maxVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -170,7 +170,7 @@ const vector<DeviceInfo>& devices()
|
|||||||
vector<DeviceInfo> devices(FeatureSet feature)
|
vector<DeviceInfo> devices(FeatureSet feature)
|
||||||
{
|
{
|
||||||
const vector<DeviceInfo>& d = devices();
|
const vector<DeviceInfo>& d = devices();
|
||||||
|
|
||||||
vector<DeviceInfo> devs_filtered;
|
vector<DeviceInfo> devs_filtered;
|
||||||
|
|
||||||
if (TargetArchs::builtWith(feature))
|
if (TargetArchs::builtWith(feature))
|
||||||
@@ -207,19 +207,19 @@ vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
|
|||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<MatType>& all_types()
|
const vector<MatType> &all_types()
|
||||||
{
|
{
|
||||||
static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
|
static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
|
||||||
|
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat readImage(const string& fileName, int flags)
|
Mat readImage(const string &fileName, int flags)
|
||||||
{
|
{
|
||||||
return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
|
return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat readImageType(const string& fname, int type)
|
Mat readImageType(const string &fname, int type)
|
||||||
{
|
{
|
||||||
Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
|
Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
|
||||||
if (CV_MAT_CN(type) == 4)
|
if (CV_MAT_CN(type) == 4)
|
||||||
@@ -232,17 +232,17 @@ Mat readImageType(const string& fname, int type)
|
|||||||
return src;
|
return src;
|
||||||
}
|
}
|
||||||
|
|
||||||
double checkNorm(const Mat& m)
|
double checkNorm(const Mat &m)
|
||||||
{
|
{
|
||||||
return norm(m, NORM_INF);
|
return norm(m, NORM_INF);
|
||||||
}
|
}
|
||||||
|
|
||||||
double checkNorm(const Mat& m1, const Mat& m2)
|
double checkNorm(const Mat &m1, const Mat &m2)
|
||||||
{
|
{
|
||||||
return norm(m1, m2, NORM_INF);
|
return norm(m1, m2, NORM_INF);
|
||||||
}
|
}
|
||||||
|
|
||||||
double checkSimilarity(const Mat& m1, const Mat& m2)
|
double checkSimilarity(const Mat &m1, const Mat &m2)
|
||||||
{
|
{
|
||||||
Mat diff;
|
Mat diff;
|
||||||
matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
|
matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
|
||||||
@@ -256,7 +256,7 @@ void cv::ocl::PrintTo(const DeviceInfo& info, ostream* os)
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void PrintTo(const Inverse& inverse, std::ostream* os)
|
void PrintTo(const Inverse &inverse, std::ostream *os)
|
||||||
{
|
{
|
||||||
if (inverse)
|
if (inverse)
|
||||||
(*os) << "inverse";
|
(*os) << "inverse";
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ int randomInt(int minVal, int maxVal);
|
|||||||
double randomDouble(double minVal, double maxVal);
|
double randomDouble(double minVal, double maxVal);
|
||||||
|
|
||||||
//std::string generateVarList(int first,...);
|
//std::string generateVarList(int first,...);
|
||||||
std::string generateVarList(int& p1,int& p2);
|
std::string generateVarList(int &p1, int &p2);
|
||||||
cv::Size randomSize(int minVal, int maxVal);
|
cv::Size randomSize(int minVal, int maxVal);
|
||||||
cv::Scalar randomScalar(double minVal, double maxVal);
|
cv::Scalar randomScalar(double minVal, double maxVal);
|
||||||
cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0);
|
cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0);
|
||||||
@@ -72,12 +72,12 @@ void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
|
|||||||
//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature);
|
//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature);
|
||||||
|
|
||||||
//! read image from testdata folder.
|
//! read image from testdata folder.
|
||||||
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
|
cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR);
|
||||||
cv::Mat readImageType(const std::string& fname, int type);
|
cv::Mat readImageType(const std::string &fname, int type);
|
||||||
|
|
||||||
double checkNorm(const cv::Mat& m);
|
double checkNorm(const cv::Mat &m);
|
||||||
double checkNorm(const cv::Mat& m1, const cv::Mat& m2);
|
double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
|
||||||
double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2);
|
double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
|
||||||
|
|
||||||
#define EXPECT_MAT_NORM(mat, eps) \
|
#define EXPECT_MAT_NORM(mat, eps) \
|
||||||
{ \
|
{ \
|
||||||
@@ -105,9 +105,9 @@ double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2);
|
|||||||
EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \
|
EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
namespace ocl
|
namespace ocl
|
||||||
{
|
{
|
||||||
// void PrintTo(const DeviceInfo& info, std::ostream* os);
|
// void PrintTo(const DeviceInfo& info, std::ostream* os);
|
||||||
}
|
}
|
||||||
@@ -120,31 +120,34 @@ using perf::MatType;
|
|||||||
std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
|
std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
|
||||||
|
|
||||||
//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
|
//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
|
||||||
const std::vector<MatType>& all_types();
|
const std::vector<MatType> &all_types();
|
||||||
|
|
||||||
class Inverse
|
class Inverse
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
inline Inverse(bool val = false) : val_(val) {}
|
inline Inverse(bool val = false) : val_(val) {}
|
||||||
|
|
||||||
inline operator bool() const { return val_; }
|
inline operator bool() const
|
||||||
|
{
|
||||||
|
return val_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool val_;
|
bool val_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void PrintTo(const Inverse& useRoi, std::ostream* os);
|
void PrintTo(const Inverse &useRoi, std::ostream *os);
|
||||||
|
|
||||||
CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
|
CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
|
||||||
|
|
||||||
CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
|
CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
|
||||||
|
|
||||||
enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
|
enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
|
||||||
CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
|
CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
|
||||||
|
|
||||||
CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
|
CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
|
||||||
|
|
||||||
CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
|
CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
|
||||||
|
|
||||||
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
|
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -51,48 +51,51 @@ using namespace cv::ocl;
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#if !defined (HAVE_OPENCL)
|
#if !defined (HAVE_OPENCL)
|
||||||
void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2,
|
void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
|
||||||
oclMat& result){throw_nogpu();}
|
oclMat &result)
|
||||||
#else
|
|
||||||
namespace cv
|
|
||||||
{
|
{
|
||||||
namespace ocl
|
throw_nogpu();
|
||||||
{
|
}
|
||||||
|
#else
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
namespace ocl
|
||||||
|
{
|
||||||
////////////////////////////////////OpenCL kernel strings//////////////////////////
|
////////////////////////////////////OpenCL kernel strings//////////////////////////
|
||||||
extern const char *blend_linear;
|
extern const char *blend_linear;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2,
|
void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
|
||||||
oclMat& result)
|
oclMat &result)
|
||||||
{
|
{
|
||||||
cv::ocl::Context *ctx = img1.clCxt;
|
cv::ocl::Context *ctx = img1.clCxt;
|
||||||
assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
|
assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
|
||||||
int channels = img1.channels();
|
int channels = img1.oclchannels();
|
||||||
int depth = img1.depth();
|
int depth = img1.depth();
|
||||||
int rows = img1.rows;
|
int rows = img1.rows;
|
||||||
int cols = img1.cols;
|
int cols = img1.cols;
|
||||||
int istep = img1.step1();
|
int istep = img1.step1();
|
||||||
int wstep = weights1.step1();
|
int wstep = weights1.step1();
|
||||||
size_t globalSize[] = {cols * channels, rows, 1};
|
size_t globalSize[] = {cols * channels, rows, 1};
|
||||||
size_t localSize[] = {16, 16, 1};
|
size_t localSize[] = {16, 16, 1};
|
||||||
|
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
if(globalSize[0]!=0)
|
if(globalSize[0] != 0)
|
||||||
{
|
{
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
|
||||||
std::string kernelName = "BlendLinear";
|
std::string kernelName = "BlendLinear";
|
||||||
|
|
||||||
openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
|
openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
File diff suppressed because it is too large
Load Diff
280
modules/ocl/src/build_warps.cpp
Normal file
280
modules/ocl/src/build_warps.cpp
Normal file
@@ -0,0 +1,280 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::ocl;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#if !defined (HAVE_OPENCL)
|
||||||
|
void cv::ocl::buildWarpPlaneMaps(Size, Rect, const Mat &, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::buildWarpCylindricalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::buildWarpSphericalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
namespace ocl
|
||||||
|
{
|
||||||
|
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||||
|
extern const char *build_warps;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// buildWarpPlaneMaps
|
||||||
|
|
||||||
|
void cv::ocl::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
|
||||||
|
float scale, oclMat &map_x, oclMat &map_y)
|
||||||
|
{
|
||||||
|
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
|
||||||
|
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
|
||||||
|
CV_Assert((T.size() == Size(3, 1) || T.size() == Size(1, 3)) && T.type() == CV_32F && T.isContinuous());
|
||||||
|
|
||||||
|
Mat K_Rinv = K * R.t();
|
||||||
|
CV_Assert(K_Rinv.isContinuous());
|
||||||
|
|
||||||
|
Mat KRT_mat(1, 12, CV_32FC1); // 9 + 3
|
||||||
|
KRT_mat(Range::all(), Range(0, 8)) = K_Rinv.reshape(1, 1);
|
||||||
|
KRT_mat(Range::all(), Range(9, 11)) = T;
|
||||||
|
|
||||||
|
oclMat KRT_oclMat(KRT_mat);
|
||||||
|
// transfer K_Rinv and T into a single cl_mem
|
||||||
|
map_x.create(dst_roi.size(), CV_32F);
|
||||||
|
map_y.create(dst_roi.size(), CV_32F);
|
||||||
|
|
||||||
|
int tl_u = dst_roi.tl().x;
|
||||||
|
int tl_v = dst_roi.tl().y;
|
||||||
|
|
||||||
|
Context *clCxt = Context::getContext();
|
||||||
|
string kernelName = "buildWarpPlaneMaps";
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&KRT_mat.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// buildWarpCylyndricalMaps
|
||||||
|
|
||||||
|
void cv::ocl::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale,
|
||||||
|
oclMat &map_x, oclMat &map_y)
|
||||||
|
{
|
||||||
|
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
|
||||||
|
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
|
||||||
|
|
||||||
|
Mat K_Rinv = K * R.t();
|
||||||
|
CV_Assert(K_Rinv.isContinuous());
|
||||||
|
|
||||||
|
oclMat KR_oclMat(K_Rinv.reshape(1, 1));
|
||||||
|
|
||||||
|
map_x.create(dst_roi.size(), CV_32F);
|
||||||
|
map_y.create(dst_roi.size(), CV_32F);
|
||||||
|
|
||||||
|
int tl_u = dst_roi.tl().x;
|
||||||
|
int tl_v = dst_roi.tl().y;
|
||||||
|
|
||||||
|
Context *clCxt = Context::getContext();
|
||||||
|
string kernelName = "buildWarpCylindricalMaps";
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// buildWarpSphericalMaps
|
||||||
|
void cv::ocl::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale,
|
||||||
|
oclMat &map_x, oclMat &map_y)
|
||||||
|
{
|
||||||
|
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
|
||||||
|
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
|
||||||
|
|
||||||
|
Mat K_Rinv = K * R.t();
|
||||||
|
CV_Assert(K_Rinv.isContinuous());
|
||||||
|
|
||||||
|
oclMat KR_oclMat(K_Rinv.reshape(1, 1));
|
||||||
|
// transfer K_Rinv, R_Kinv into a single cl_mem
|
||||||
|
map_x.create(dst_roi.size(), CV_32F);
|
||||||
|
map_y.create(dst_roi.size(), CV_32F);
|
||||||
|
|
||||||
|
int tl_u = dst_roi.tl().x;
|
||||||
|
int tl_v = dst_roi.tl().y;
|
||||||
|
|
||||||
|
Context *clCxt = Context::getContext();
|
||||||
|
string kernelName = "buildWarpSphericalMaps";
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
|
||||||
|
{
|
||||||
|
|
||||||
|
CV_Assert(M.rows == 2 && M.cols == 3);
|
||||||
|
|
||||||
|
xmap.create(dsize, CV_32FC1);
|
||||||
|
ymap.create(dsize, CV_32FC1);
|
||||||
|
|
||||||
|
float coeffs[2 * 3];
|
||||||
|
Mat coeffsMat(2, 3, CV_32F, (void *)coeffs);
|
||||||
|
|
||||||
|
if (inverse)
|
||||||
|
M.convertTo(coeffsMat, coeffsMat.type());
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cv::Mat iM;
|
||||||
|
invertAffineTransform(M, iM);
|
||||||
|
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||||
|
}
|
||||||
|
|
||||||
|
oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
|
||||||
|
|
||||||
|
Context *clCxt = Context::getContext();
|
||||||
|
string kernelName = "buildWarpAffineMaps";
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
|
||||||
|
{
|
||||||
|
|
||||||
|
CV_Assert(M.rows == 3 && M.cols == 3);
|
||||||
|
|
||||||
|
xmap.create(dsize, CV_32FC1);
|
||||||
|
ymap.create(dsize, CV_32FC1);
|
||||||
|
|
||||||
|
float coeffs[3 * 3];
|
||||||
|
Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
|
||||||
|
|
||||||
|
if (inverse)
|
||||||
|
M.convertTo(coeffsMat, coeffsMat.type());
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cv::Mat iM;
|
||||||
|
invert(M, iM);
|
||||||
|
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||||
|
}
|
||||||
|
|
||||||
|
oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
|
||||||
|
|
||||||
|
Context *clCxt = Context::getContext();
|
||||||
|
string kernelName = "buildWarpPerspectiveMaps";
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif // HAVE_OPENCL
|
||||||
@@ -52,10 +52,22 @@ using namespace cv::ocl;
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#if !defined (HAVE_OPENCL)
|
#if !defined (HAVE_OPENCL)
|
||||||
void cv::ocl::Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) { throw_nogpu(); }
|
void cv::ocl::Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
|
||||||
void cv::ocl::Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false){ throw_nogpu(); }
|
{
|
||||||
void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); }
|
throw_nogpu();
|
||||||
void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); }
|
}
|
||||||
|
void cv::ocl::Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
@@ -67,14 +79,14 @@ namespace cv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_), counter(NULL)
|
cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(NULL)
|
||||||
{
|
{
|
||||||
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
|
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
|
||||||
|
|
||||||
create(dx_.size(), -1);
|
create(dx_.size(), -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
|
void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
|
||||||
{
|
{
|
||||||
ensureSizeIsEnough(image_size, CV_32SC1, dx);
|
ensureSizeIsEnough(image_size, CV_32SC1, dx);
|
||||||
ensureSizeIsEnough(image_size, CV_32SC1, dy);
|
ensureSizeIsEnough(image_size, CV_32SC1, dy);
|
||||||
@@ -123,27 +135,31 @@ void cv::ocl::CannyBuf::release()
|
|||||||
openCLFree(counter);
|
openCLFree(counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv { namespace ocl {
|
namespace cv
|
||||||
namespace canny
|
{
|
||||||
|
namespace ocl
|
||||||
{
|
{
|
||||||
void calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols);
|
namespace canny
|
||||||
|
{
|
||||||
|
void calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols);
|
||||||
|
|
||||||
void calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad);
|
void calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad);
|
||||||
void calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad);
|
void calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad);
|
||||||
|
|
||||||
void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh);
|
void calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh);
|
||||||
|
|
||||||
void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols);
|
void edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols);
|
||||||
|
|
||||||
void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols);
|
void edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols);
|
||||||
|
|
||||||
void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols);
|
void getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}}// cv::ocl
|
}// cv::ocl
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void CannyCaller(CannyBuf& buf, oclMat& dst, float low_thresh, float high_thresh)
|
void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
|
||||||
{
|
{
|
||||||
using namespace ::cv::ocl::canny;
|
using namespace ::cv::ocl::canny;
|
||||||
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
|
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
|
||||||
@@ -156,13 +172,13 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::Canny(const oclMat& src, oclMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
|
void cv::ocl::Canny(const oclMat &src, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
|
||||||
{
|
{
|
||||||
CannyBuf buf(src.size(), apperture_size);
|
CannyBuf buf(src.size(), apperture_size);
|
||||||
Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);
|
Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
|
void cv::ocl::Canny(const oclMat &src, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
|
||||||
{
|
{
|
||||||
using namespace ::cv::ocl::canny;
|
using namespace ::cv::ocl::canny;
|
||||||
|
|
||||||
@@ -192,13 +208,13 @@ void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_th
|
|||||||
}
|
}
|
||||||
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
|
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
|
||||||
}
|
}
|
||||||
void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& dst, double low_thresh, double high_thresh, bool L2gradient)
|
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient)
|
||||||
{
|
{
|
||||||
CannyBuf buf(dx, dy);
|
CannyBuf buf(dx, dy);
|
||||||
Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);
|
Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& dst, double low_thresh, double high_thresh, bool L2gradient)
|
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient)
|
||||||
{
|
{
|
||||||
using namespace ::cv::ocl::canny;
|
using namespace ::cv::ocl::canny;
|
||||||
|
|
||||||
@@ -210,7 +226,8 @@ void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& d
|
|||||||
dst.create(dx.size(), CV_8U);
|
dst.create(dx.size(), CV_8U);
|
||||||
dst.setTo(Scalar::all(0));
|
dst.setTo(Scalar::all(0));
|
||||||
|
|
||||||
buf.dx = dx; buf.dy = dy;
|
buf.dx = dx;
|
||||||
|
buf.dy = dy;
|
||||||
buf.create(dx.size(), -1);
|
buf.create(dx.size(), -1);
|
||||||
buf.edgeBuf.setTo(Scalar::all(0));
|
buf.edgeBuf.setTo(Scalar::all(0));
|
||||||
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
|
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
|
||||||
@@ -218,7 +235,7 @@ void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& d
|
|||||||
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
|
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
|
||||||
}
|
}
|
||||||
|
|
||||||
void canny::calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols)
|
void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols)
|
||||||
{
|
{
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
string kernelName = "calcSobelRowPass";
|
string kernelName = "calcSobelRowPass";
|
||||||
@@ -241,7 +258,7 @@ void canny::calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_b
|
|||||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void canny::calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad)
|
void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
|
||||||
{
|
{
|
||||||
Context *clCxt = dx_buf.clCxt;
|
Context *clCxt = dx_buf.clCxt;
|
||||||
string kernelName = "calcMagnitude_buf";
|
string kernelName = "calcMagnitude_buf";
|
||||||
@@ -275,7 +292,7 @@ void canny::calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat
|
|||||||
}
|
}
|
||||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||||
}
|
}
|
||||||
void canny::calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad)
|
void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
|
||||||
{
|
{
|
||||||
Context *clCxt = dx.clCxt;
|
Context *clCxt = dx.clCxt;
|
||||||
string kernelName = "calcMagnitude";
|
string kernelName = "calcMagnitude";
|
||||||
@@ -304,7 +321,7 @@ void canny::calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, i
|
|||||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh)
|
void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
|
||||||
{
|
{
|
||||||
Context *clCxt = dx.clCxt;
|
Context *clCxt = dx.clCxt;
|
||||||
|
|
||||||
@@ -335,7 +352,7 @@ void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int ro
|
|||||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols)
|
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
|
||||||
{
|
{
|
||||||
Context *clCxt = map.clCxt;
|
Context *clCxt = map.clCxt;
|
||||||
string kernelName = "edgesHysteresisLocal";
|
string kernelName = "edgesHysteresisLocal";
|
||||||
@@ -355,7 +372,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, i
|
|||||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols)
|
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
|
||||||
{
|
{
|
||||||
unsigned int count;
|
unsigned int count;
|
||||||
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
|
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
|
||||||
@@ -389,7 +406,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, voi
|
|||||||
#undef DIVUP
|
#undef DIVUP
|
||||||
}
|
}
|
||||||
|
|
||||||
void canny::getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols)
|
void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
|
||||||
{
|
{
|
||||||
Context *clCxt = map.clCxt;
|
Context *clCxt = map.clCxt;
|
||||||
string kernelName = "getEdges";
|
string kernelName = "getEdges";
|
||||||
|
|||||||
@@ -81,9 +81,9 @@ namespace
|
|||||||
void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx)
|
void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx)
|
||||||
{
|
{
|
||||||
vector<pair<size_t , const void *> > args;
|
vector<pair<size_t , const void *> > args;
|
||||||
int channels = src.channels();
|
int channels = src.oclchannels();
|
||||||
char build_options[50];
|
char build_options[50];
|
||||||
//printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.channels(),bidx);
|
//printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx);
|
||||||
sprintf(build_options, "-D DEPTH_%d", src.depth());
|
sprintf(build_options, "-D DEPTH_%d", src.depth());
|
||||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
|
||||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
|
||||||
@@ -99,7 +99,7 @@ namespace
|
|||||||
void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||||
{
|
{
|
||||||
Size sz = src.size();
|
Size sz = src.size();
|
||||||
int scn = src.channels(), depth = src.depth(), bidx;
|
int scn = src.oclchannels(), depth = src.depth(), bidx;
|
||||||
|
|
||||||
CV_Assert(depth == CV_8U || depth == CV_16U);
|
CV_Assert(depth == CV_8U || depth == CV_16U);
|
||||||
|
|
||||||
|
|||||||
@@ -53,41 +53,44 @@ using namespace std;
|
|||||||
|
|
||||||
#if !defined(HAVE_OPENCL)
|
#if !defined(HAVE_OPENCL)
|
||||||
|
|
||||||
void cv::ocl::columnSum(const oclMat& src,oclMat& dst){ throw_nogpu(); }
|
void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
|
||||||
#else /*!HAVE_OPENCL */
|
#else /*!HAVE_OPENCL */
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
|
||||||
namespace ocl
|
|
||||||
{
|
|
||||||
extern const char* imgproc_columnsum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::ocl::columnSum(const oclMat& src,oclMat& dst)
|
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_32FC1);
|
namespace ocl
|
||||||
|
{
|
||||||
|
extern const char *imgproc_columnsum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dst.create(src.size(), src.type());
|
void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
|
||||||
|
{
|
||||||
|
CV_Assert(src.type() == CV_32FC1);
|
||||||
|
|
||||||
Context *clCxt = src.clCxt;
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
const std::string kernelName = "columnSum";
|
|
||||||
|
|
||||||
std::vector< pair<size_t, const void *> > args;
|
|
||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
Context *clCxt = src.clCxt;
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
|
|
||||||
|
|
||||||
size_t globalThreads[3] = {dst.cols, 1, 1};
|
const std::string kernelName = "columnSum";
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
|
std::vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {dst.cols, 1, 1};
|
||||||
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
|
|
||||||
|
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -52,43 +52,50 @@ using namespace cv::ocl;
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#if !defined (HAVE_OPENCL)
|
#if !defined (HAVE_OPENCL)
|
||||||
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
|
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#include <clAmdFft.h>
|
#include <clAmdFft.h>
|
||||||
|
|
||||||
namespace cv{ namespace ocl {
|
namespace cv
|
||||||
enum FftType
|
{
|
||||||
|
namespace ocl
|
||||||
{
|
{
|
||||||
C2R = 1, // complex to complex
|
enum FftType
|
||||||
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
|
{
|
||||||
C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
|
C2R = 1, // complex to complex
|
||||||
};
|
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
|
||||||
struct FftPlan
|
C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
|
||||||
{
|
};
|
||||||
friend void fft_setup();
|
struct FftPlan
|
||||||
friend void fft_teardown();
|
{
|
||||||
~FftPlan();
|
friend void fft_setup();
|
||||||
protected:
|
friend void fft_teardown();
|
||||||
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
|
~FftPlan();
|
||||||
const Size dft_size;
|
protected:
|
||||||
const int src_step, dst_step;
|
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
|
||||||
const int flags;
|
const Size dft_size;
|
||||||
const FftType type;
|
const int src_step, dst_step;
|
||||||
clAmdFftPlanHandle plHandle;
|
const int flags;
|
||||||
static vector<FftPlan*> planStore;
|
const FftType type;
|
||||||
static bool started;
|
clAmdFftPlanHandle plHandle;
|
||||||
static clAmdFftSetupData * setupData;
|
static vector<FftPlan *> planStore;
|
||||||
public:
|
static bool started;
|
||||||
// return a baked plan->
|
static clAmdFftSetupData *setupData;
|
||||||
// if there is one matched plan, return it
|
public:
|
||||||
// if not, bake a new one, put it into the planStore and return it.
|
// return a baked plan->
|
||||||
static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
|
// if there is one matched plan, return it
|
||||||
};
|
// if not, bake a new one, put it into the planStore and return it.
|
||||||
}}
|
static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
bool cv::ocl::FftPlan::started = false;
|
bool cv::ocl::FftPlan::started = false;
|
||||||
vector<cv::ocl::FftPlan*> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan*>();
|
vector<cv::ocl::FftPlan *> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan *>();
|
||||||
clAmdFftSetupData * cv::ocl::FftPlan::setupData = 0;
|
clAmdFftSetupData *cv::ocl::FftPlan::setupData = 0;
|
||||||
|
|
||||||
void cv::ocl::fft_setup()
|
void cv::ocl::fft_setup()
|
||||||
{
|
{
|
||||||
@@ -134,9 +141,9 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
|
|||||||
clAmdFftResultLocation place;
|
clAmdFftResultLocation place;
|
||||||
clAmdFftLayout inLayout;
|
clAmdFftLayout inLayout;
|
||||||
clAmdFftLayout outLayout;
|
clAmdFftLayout outLayout;
|
||||||
clAmdFftDim dim = is_1d_input||is_row_dft ? CLFFT_1D : CLFFT_2D;
|
clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
|
||||||
|
|
||||||
size_t batchSize = is_row_dft?dft_size.height : 1;
|
size_t batchSize = is_row_dft ? dft_size.height : 1;
|
||||||
size_t clLengthsIn[ 3 ] = {1, 1, 1};
|
size_t clLengthsIn[ 3 ] = {1, 1, 1};
|
||||||
size_t clStridesIn[ 3 ] = {1, 1, 1};
|
size_t clStridesIn[ 3 ] = {1, 1, 1};
|
||||||
size_t clLengthsOut[ 3 ] = {1, 1, 1};
|
size_t clLengthsOut[ 3 ] = {1, 1, 1};
|
||||||
@@ -195,7 +202,7 @@ cv::ocl::FftPlan::~FftPlan()
|
|||||||
{
|
{
|
||||||
if(planStore[i]->plHandle == plHandle)
|
if(planStore[i]->plHandle == plHandle)
|
||||||
{
|
{
|
||||||
planStore.erase(planStore.begin()+ i);
|
planStore.erase(planStore.begin() + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
|
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
|
||||||
@@ -206,15 +213,15 @@ clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int
|
|||||||
// go through search
|
// go through search
|
||||||
for(int i = 0; i < planStore.size(); i ++)
|
for(int i = 0; i < planStore.size(); i ++)
|
||||||
{
|
{
|
||||||
FftPlan * plan = planStore[i];
|
FftPlan *plan = planStore[i];
|
||||||
if(
|
if(
|
||||||
plan->dft_size.width == _dft_size.width &&
|
plan->dft_size.width == _dft_size.width &&
|
||||||
plan->dft_size.height == _dft_size.height &&
|
plan->dft_size.height == _dft_size.height &&
|
||||||
plan->flags == _flags &&
|
plan->flags == _flags &&
|
||||||
plan->src_step == _src_step &&
|
plan->src_step == _src_step &&
|
||||||
plan->dst_step == _dst_step &&
|
plan->dst_step == _dst_step &&
|
||||||
plan->type == _type
|
plan->type == _type
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
return plan->plHandle;
|
return plan->plHandle;
|
||||||
}
|
}
|
||||||
@@ -225,9 +232,9 @@ clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int
|
|||||||
return newPlan->plHandle;
|
return newPlan->plHandle;
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags)
|
void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
|
||||||
{
|
{
|
||||||
if(dft_size == Size(0,0))
|
if(dft_size == Size(0, 0))
|
||||||
{
|
{
|
||||||
dft_size = src.size();
|
dft_size = src.size();
|
||||||
}
|
}
|
||||||
@@ -258,7 +265,7 @@ void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags)
|
|||||||
break;
|
break;
|
||||||
case R2C:
|
case R2C:
|
||||||
CV_Assert(!is_row_dft); // this is not supported yet
|
CV_Assert(!is_row_dft); // this is not supported yet
|
||||||
dst.create(src.rows, src.cols/2 + 1, CV_32FC2);
|
dst.create(src.rows, src.cols / 2 + 1, CV_32FC2);
|
||||||
break;
|
break;
|
||||||
case C2R:
|
case C2R:
|
||||||
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
|
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
|
||||||
@@ -274,23 +281,23 @@ void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags)
|
|||||||
clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type);
|
clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type);
|
||||||
|
|
||||||
//get the buffersize
|
//get the buffersize
|
||||||
size_t buffersize=0;
|
size_t buffersize = 0;
|
||||||
openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
|
openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
|
||||||
|
|
||||||
//allocate the intermediate buffer
|
//allocate the intermediate buffer
|
||||||
cl_mem clMedBuffer=NULL;
|
cl_mem clMedBuffer = NULL;
|
||||||
if (buffersize)
|
if (buffersize)
|
||||||
{
|
{
|
||||||
cl_int medstatus;
|
cl_int medstatus;
|
||||||
clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
|
clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
|
||||||
openCLSafeCall( medstatus );
|
openCLSafeCall( medstatus );
|
||||||
}
|
}
|
||||||
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
|
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
|
||||||
is_inverse?CLFFT_BACKWARD:CLFFT_FORWARD,
|
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
|
||||||
1,
|
1,
|
||||||
&src.clCxt->impl->clCmdQueue,
|
&src.clCxt->impl->clCmdQueue,
|
||||||
0, NULL, NULL,
|
0, NULL, NULL,
|
||||||
(cl_mem*)&src.data, (cl_mem*)&dst.data, clMedBuffer ) );
|
(cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
|
||||||
openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
|
openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
|
||||||
if(clMedBuffer)
|
if(clMedBuffer)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -110,9 +110,9 @@ Ptr<FilterEngine_GPU> cv::ocl::createLinearFilter_GPU(int, int, const Mat &, con
|
|||||||
}
|
}
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType )
|
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType )
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
return Ptr<FilterEngine_GPU>(0);
|
return Ptr<FilterEngine_GPU>(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::boxFilter(const oclMat &, oclMat &, int, Size, Point, int)
|
void cv::ocl::boxFilter(const oclMat &, oclMat &, int, Size, Point, int)
|
||||||
@@ -244,7 +244,7 @@ namespace
|
|||||||
class Filter2DEngine_GPU : public FilterEngine_GPU
|
class Filter2DEngine_GPU : public FilterEngine_GPU
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_) : filter2D(filter2D_) {}
|
Filter2DEngine_GPU(const Ptr<BaseFilter_GPU> &filter2D_) : filter2D(filter2D_) {}
|
||||||
|
|
||||||
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
|
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
|
||||||
{
|
{
|
||||||
@@ -328,53 +328,53 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c
|
|||||||
CV_Assert(src.clCxt == dst.clCxt);
|
CV_Assert(src.clCxt == dst.clCxt);
|
||||||
CV_Assert( (src.cols == dst.cols) &&
|
CV_Assert( (src.cols == dst.cols) &&
|
||||||
(src.rows == dst.rows) );
|
(src.rows == dst.rows) );
|
||||||
CV_Assert( (src.channels() == dst.channels()) );
|
CV_Assert( (src.oclchannels() == dst.oclchannels()) );
|
||||||
|
|
||||||
int srcStep = src.step1() / src.channels();
|
int srcStep = src.step1() / src.oclchannels();
|
||||||
int dstStep = dst.step1() / dst.channels();
|
int dstStep = dst.step1() / dst.oclchannels();
|
||||||
int srcOffset = src.offset / src.elemSize();
|
int srcOffset = src.offset / src.elemSize();
|
||||||
int dstOffset = dst.offset / dst.elemSize();
|
int dstOffset = dst.offset / dst.elemSize();
|
||||||
|
|
||||||
int srcOffset_x=srcOffset%srcStep;
|
int srcOffset_x = srcOffset % srcStep;
|
||||||
int srcOffset_y=srcOffset/srcStep;
|
int srcOffset_y = srcOffset / srcStep;
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
string kernelName;
|
string kernelName;
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] * localThreads[0], (src.rows + localThreads[1]) / localThreads[1] * localThreads[1], 1};
|
size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1};
|
||||||
|
|
||||||
if(src.type()==CV_8UC1)
|
if(src.type() == CV_8UC1)
|
||||||
{
|
{
|
||||||
kernelName = "morph_C1_D0";
|
kernelName = "morph_C1_D0";
|
||||||
globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
|
globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
|
||||||
CV_Assert( localThreads[0]*localThreads[1]*8 >= (localThreads[0]*4+ksize.width-1)*(localThreads[1]+ksize.height-1) );
|
CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
kernelName = "morph";
|
kernelName = "morph";
|
||||||
CV_Assert( localThreads[0]*localThreads[1]*2 >= (localThreads[0]+ksize.width-1)*(localThreads[1]+ksize.height-1) );
|
CV_Assert( localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1) );
|
||||||
}
|
}
|
||||||
char s[64];
|
char s[64];
|
||||||
switch(src.type())
|
switch(src.type())
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
sprintf(s, "-D VAL=255");
|
sprintf(s, "-D VAL=255");
|
||||||
break;
|
break;
|
||||||
case CV_8UC3:
|
case CV_8UC3:
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
sprintf(s, "-D VAL=255 -D GENTYPE=uchar4");
|
sprintf(s, "-D VAL=255 -D GENTYPE=uchar4");
|
||||||
break;
|
break;
|
||||||
case CV_32FC1:
|
case CV_32FC1:
|
||||||
sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float");
|
sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float");
|
||||||
break;
|
break;
|
||||||
case CV_32FC3:
|
case CV_32FC3:
|
||||||
case CV_32FC4:
|
case CV_32FC4:
|
||||||
sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float4");
|
sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float4");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat,"unsupported type");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported type");
|
||||||
}
|
}
|
||||||
char compile_option[128];
|
char compile_option[128];
|
||||||
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1],s);
|
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s);
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
||||||
@@ -385,9 +385,9 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data));
|
||||||
args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholecols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols));
|
||||||
args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholerows));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows));
|
||||||
args.push_back( make_pair( sizeof(cl_int),(void*)&dstOffset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset));
|
||||||
openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
|
openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -400,53 +400,53 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize,
|
|||||||
CV_Assert(src.clCxt == dst.clCxt);
|
CV_Assert(src.clCxt == dst.clCxt);
|
||||||
CV_Assert( (src.cols == dst.cols) &&
|
CV_Assert( (src.cols == dst.cols) &&
|
||||||
(src.rows == dst.rows) );
|
(src.rows == dst.rows) );
|
||||||
CV_Assert( (src.channels() == dst.channels()) );
|
CV_Assert( (src.oclchannels() == dst.oclchannels()) );
|
||||||
|
|
||||||
int srcStep = src.step1() / src.channels();
|
int srcStep = src.step1() / src.oclchannels();
|
||||||
int dstStep = dst.step1() / dst.channels();
|
int dstStep = dst.step1() / dst.oclchannels();
|
||||||
int srcOffset = src.offset / src.elemSize();
|
int srcOffset = src.offset / src.elemSize();
|
||||||
int dstOffset = dst.offset / dst.elemSize();
|
int dstOffset = dst.offset / dst.elemSize();
|
||||||
|
|
||||||
int srcOffset_x=srcOffset%srcStep;
|
int srcOffset_x = srcOffset % srcStep;
|
||||||
int srcOffset_y=srcOffset/srcStep;
|
int srcOffset_y = srcOffset / srcStep;
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
string kernelName;
|
string kernelName;
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] * localThreads[0], (src.rows + localThreads[1]) / localThreads[1] * localThreads[1], 1};
|
size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1};
|
||||||
|
|
||||||
if(src.type()==CV_8UC1)
|
if(src.type() == CV_8UC1)
|
||||||
{
|
{
|
||||||
kernelName = "morph_C1_D0";
|
kernelName = "morph_C1_D0";
|
||||||
globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
|
globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
|
||||||
CV_Assert( localThreads[0]*localThreads[1]*8 >= (localThreads[0]*4+ksize.width-1)*(localThreads[1]+ksize.height-1) );
|
CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
kernelName = "morph";
|
kernelName = "morph";
|
||||||
CV_Assert( localThreads[0]*localThreads[1]*2 >= (localThreads[0]+ksize.width-1)*(localThreads[1]+ksize.height-1) );
|
CV_Assert( localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1) );
|
||||||
}
|
}
|
||||||
char s[64];
|
char s[64];
|
||||||
switch(src.type())
|
switch(src.type())
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
sprintf(s, "-D VAL=0");
|
sprintf(s, "-D VAL=0");
|
||||||
break;
|
break;
|
||||||
case CV_8UC3:
|
case CV_8UC3:
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
sprintf(s, "-D VAL=0 -D GENTYPE=uchar4");
|
sprintf(s, "-D VAL=0 -D GENTYPE=uchar4");
|
||||||
break;
|
break;
|
||||||
case CV_32FC1:
|
case CV_32FC1:
|
||||||
sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float");
|
sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float");
|
||||||
break;
|
break;
|
||||||
case CV_32FC3:
|
case CV_32FC3:
|
||||||
case CV_32FC4:
|
case CV_32FC4:
|
||||||
sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float4");
|
sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float4");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat,"unsupported type");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported type");
|
||||||
}
|
}
|
||||||
char compile_option[128];
|
char compile_option[128];
|
||||||
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1],s);
|
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s);
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
||||||
@@ -457,9 +457,9 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize,
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data));
|
||||||
args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholecols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols));
|
||||||
args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholerows));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows));
|
||||||
args.push_back( make_pair( sizeof(cl_int),(void*)&dstOffset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset));
|
||||||
openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
|
openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -467,12 +467,12 @@ Ptr<BaseFilter_GPU> cv::ocl::getMorphologyFilter_GPU(int op, int type, const Mat
|
|||||||
{
|
{
|
||||||
static const GPUMorfFilter_t GPUMorfFilter_callers[2][5] =
|
static const GPUMorfFilter_t GPUMorfFilter_callers[2][5] =
|
||||||
{
|
{
|
||||||
{0, GPUErode, 0, 0, GPUErode },
|
{0, GPUErode, 0, GPUErode, GPUErode },
|
||||||
{0, GPUDilate, 0, 0, GPUDilate}
|
{0, GPUDilate, 0, GPUDilate, GPUDilate}
|
||||||
};
|
};
|
||||||
|
|
||||||
CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
|
CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
|
||||||
CV_Assert(type == CV_8UC1 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC4);
|
CV_Assert(type == CV_8UC1 || type == CV_8UC3 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC1 || type == CV_32FC4);
|
||||||
|
|
||||||
oclMat gpu_krnl;
|
oclMat gpu_krnl;
|
||||||
normalizeKernel(kernel, gpu_krnl);
|
normalizeKernel(kernel, gpu_krnl);
|
||||||
@@ -486,7 +486,7 @@ namespace
|
|||||||
class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU
|
class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int iters_) :
|
MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU> &filter2D_, int iters_) :
|
||||||
Filter2DEngine_GPU(filter2D_), iters(iters_) {}
|
Filter2DEngine_GPU(filter2D_), iters(iters_) {}
|
||||||
|
|
||||||
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
|
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
|
||||||
@@ -539,18 +539,18 @@ Ptr<FilterEngine_GPU> cv::ocl::createMorphologyFilter_GPU(int op, int type, cons
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations,int borderType,const Scalar& borderValue)
|
void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations, int borderType, const Scalar &borderValue)
|
||||||
{
|
{
|
||||||
if((borderType != cv::BORDER_CONSTANT) || (borderValue!=morphologyDefaultBorderValue()))
|
if((borderType != cv::BORDER_CONSTANT) || (borderValue != morphologyDefaultBorderValue()))
|
||||||
{
|
{
|
||||||
CV_Error(CV_StsBadArg,"unsupported border type");
|
CV_Error(CV_StsBadArg, "unsupported border type");
|
||||||
}
|
}
|
||||||
Mat kernel;
|
Mat kernel;
|
||||||
Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);
|
Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);
|
||||||
|
|
||||||
normalizeAnchor(anchor, ksize);
|
normalizeAnchor(anchor, ksize);
|
||||||
|
|
||||||
if (iterations == 0 || _kernel.rows *_kernel.cols == 1)
|
if (iterations == 0 || _kernel.rows * _kernel.cols == 1)
|
||||||
{
|
{
|
||||||
src.copyTo(dst);
|
src.copyTo(dst);
|
||||||
return;
|
return;
|
||||||
@@ -581,7 +581,7 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations,
|
void cv::ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations,
|
||||||
int borderType,const Scalar& borderValue)
|
int borderType, const Scalar &borderValue)
|
||||||
{
|
{
|
||||||
bool allZero = true;
|
bool allZero = true;
|
||||||
for(int i = 0; i < kernel.rows * kernel.cols; ++i)
|
for(int i = 0; i < kernel.rows * kernel.cols; ++i)
|
||||||
@@ -591,48 +591,48 @@ void cv::ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point an
|
|||||||
{
|
{
|
||||||
kernel.data[0] = 1;
|
kernel.data[0] = 1;
|
||||||
}
|
}
|
||||||
morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations,borderType, borderValue);
|
morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations,
|
void cv::ocl::dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations,
|
||||||
int borderType,const Scalar& borderValue)
|
int borderType, const Scalar &borderValue)
|
||||||
{
|
{
|
||||||
morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations,borderType, borderValue);
|
morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor, int iterations,
|
void cv::ocl::morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor, int iterations,
|
||||||
int borderType,const Scalar& borderValue)
|
int borderType, const Scalar &borderValue)
|
||||||
{
|
{
|
||||||
oclMat temp;
|
oclMat temp;
|
||||||
switch( op )
|
switch( op )
|
||||||
{
|
{
|
||||||
case MORPH_ERODE:
|
case MORPH_ERODE:
|
||||||
erode( src, dst, kernel, anchor, iterations,borderType, borderValue);
|
erode( src, dst, kernel, anchor, iterations, borderType, borderValue);
|
||||||
break;
|
break;
|
||||||
case MORPH_DILATE:
|
case MORPH_DILATE:
|
||||||
dilate( src, dst, kernel, anchor, iterations,borderType, borderValue);
|
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue);
|
||||||
break;
|
break;
|
||||||
case MORPH_OPEN:
|
case MORPH_OPEN:
|
||||||
erode( src, temp, kernel, anchor, iterations,borderType, borderValue);
|
erode( src, temp, kernel, anchor, iterations, borderType, borderValue);
|
||||||
dilate( temp, dst, kernel, anchor, iterations,borderType, borderValue);
|
dilate( temp, dst, kernel, anchor, iterations, borderType, borderValue);
|
||||||
break;
|
break;
|
||||||
case CV_MOP_CLOSE:
|
case CV_MOP_CLOSE:
|
||||||
dilate( src, temp, kernel, anchor, iterations,borderType, borderValue);
|
dilate( src, temp, kernel, anchor, iterations, borderType, borderValue);
|
||||||
erode( temp, dst, kernel, anchor, iterations,borderType, borderValue);
|
erode( temp, dst, kernel, anchor, iterations, borderType, borderValue);
|
||||||
break;
|
break;
|
||||||
case CV_MOP_GRADIENT:
|
case CV_MOP_GRADIENT:
|
||||||
erode( src, temp, kernel, anchor, iterations,borderType, borderValue);
|
erode( src, temp, kernel, anchor, iterations, borderType, borderValue);
|
||||||
dilate( src, dst, kernel, anchor, iterations,borderType, borderValue);
|
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue);
|
||||||
subtract(dst, temp, dst);
|
subtract(dst, temp, dst);
|
||||||
break;
|
break;
|
||||||
case CV_MOP_TOPHAT:
|
case CV_MOP_TOPHAT:
|
||||||
erode( src, dst, kernel, anchor, iterations,borderType, borderValue);
|
erode( src, dst, kernel, anchor, iterations, borderType, borderValue);
|
||||||
dilate( dst, temp, kernel, anchor, iterations,borderType, borderValue);
|
dilate( dst, temp, kernel, anchor, iterations, borderType, borderValue);
|
||||||
subtract(src, temp, dst);
|
subtract(src, temp, dst);
|
||||||
break;
|
break;
|
||||||
case CV_MOP_BLACKHAT:
|
case CV_MOP_BLACKHAT:
|
||||||
dilate( src, dst, kernel, anchor, iterations,borderType, borderValue);
|
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue);
|
||||||
erode( dst, temp, kernel, anchor, iterations,borderType, borderValue);
|
erode( dst, temp, kernel, anchor, iterations, borderType, borderValue);
|
||||||
subtract(temp, src, dst);
|
subtract(temp, src, dst);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -670,12 +670,12 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
|
|||||||
CV_Assert(src.clCxt == dst.clCxt);
|
CV_Assert(src.clCxt == dst.clCxt);
|
||||||
CV_Assert( (src.cols == dst.cols) &&
|
CV_Assert( (src.cols == dst.cols) &&
|
||||||
(src.rows == dst.rows) );
|
(src.rows == dst.rows) );
|
||||||
CV_Assert( (src.channels() == dst.channels()) );
|
CV_Assert( (src.oclchannels() == dst.oclchannels()) );
|
||||||
CV_Assert( (borderType != 0) );
|
CV_Assert( (borderType != 0) );
|
||||||
CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
|
CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
|
||||||
CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
|
CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
int cn = src.channels();
|
int cn = src.oclchannels();
|
||||||
int depth = src.depth();
|
int depth = src.depth();
|
||||||
|
|
||||||
string kernelName = "filter2D";
|
string kernelName = "filter2D";
|
||||||
@@ -692,14 +692,14 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
|
|||||||
{4, 4, 4, 4, 1, 1, 4}
|
{4, 4, 4, 4, 1, 1, 4}
|
||||||
};
|
};
|
||||||
|
|
||||||
int vector_length = vector_lengths[cn-1][depth];
|
int vector_length = vector_lengths[cn - 1][depth];
|
||||||
int offset_cols = (dst_offset_x) & (vector_length - 1);
|
int offset_cols = (dst_offset_x) & (vector_length - 1);
|
||||||
int cols = dst.cols + offset_cols;
|
int cols = dst.cols + offset_cols;
|
||||||
int rows = divUp(dst.rows, vector_length);
|
int rows = divUp(dst.rows, vector_length);
|
||||||
|
|
||||||
size_t localThreads[3] = {256, 1, 1};
|
size_t localThreads[3] = {256, 1, 1};
|
||||||
size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
|
size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
|
||||||
divUp(rows, localThreads[1]) * localThreads[1], 1
|
divUp(rows, localThreads[1]) *localThreads[1], 1
|
||||||
};
|
};
|
||||||
|
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
@@ -723,9 +723,9 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
|
|||||||
Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
|
Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
|
||||||
Point anchor, int borderType)
|
Point anchor, int borderType)
|
||||||
{
|
{
|
||||||
static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, 0, GPUFilter2D};
|
static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, GPUFilter2D, GPUFilter2D};
|
||||||
|
|
||||||
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4) && dstType == srcType);
|
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
|
||||||
|
|
||||||
oclMat gpu_krnl;
|
oclMat gpu_krnl;
|
||||||
int nDivisor;
|
int nDivisor;
|
||||||
@@ -767,8 +767,8 @@ namespace
|
|||||||
class SeparableFilterEngine_GPU : public FilterEngine_GPU
|
class SeparableFilterEngine_GPU : public FilterEngine_GPU
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_,
|
SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter_,
|
||||||
const Ptr<BaseColumnFilter_GPU>& columnFilter_) :
|
const Ptr<BaseColumnFilter_GPU> &columnFilter_) :
|
||||||
rowFilter(rowFilter_), columnFilter(columnFilter_)
|
rowFilter(rowFilter_), columnFilter(columnFilter_)
|
||||||
{
|
{
|
||||||
ksize = Size(rowFilter->ksize, columnFilter->ksize);
|
ksize = Size(rowFilter->ksize, columnFilter->ksize);
|
||||||
@@ -780,7 +780,7 @@ namespace
|
|||||||
Size src_size = src.size();
|
Size src_size = src.size();
|
||||||
int src_type = src.type();
|
int src_type = src.type();
|
||||||
|
|
||||||
int cn = src.channels();
|
int cn = src.oclchannels();
|
||||||
//dst.create(src_size, src_type);
|
//dst.create(src_size, src_type);
|
||||||
dst = Scalar(0.0);
|
dst = Scalar(0.0);
|
||||||
//dstBuf.create(src_size, src_type);
|
//dstBuf.create(src_size, src_type);
|
||||||
@@ -810,8 +810,8 @@ namespace
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
|
Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
|
||||||
const Ptr<BaseColumnFilter_GPU>& columnFilter)
|
const Ptr<BaseColumnFilter_GPU> &columnFilter)
|
||||||
{
|
{
|
||||||
return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter));
|
return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter));
|
||||||
}
|
}
|
||||||
@@ -1071,12 +1071,12 @@ void GPUFilterBox_32F_C4R(const oclMat &src, oclMat &dst,
|
|||||||
Ptr<BaseFilter_GPU> cv::ocl::getBoxFilter_GPU(int srcType, int dstType,
|
Ptr<BaseFilter_GPU> cv::ocl::getBoxFilter_GPU(int srcType, int dstType,
|
||||||
const Size &ksize, Point anchor, int borderType)
|
const Size &ksize, Point anchor, int borderType)
|
||||||
{
|
{
|
||||||
static const FilterBox_t FilterBox_callers[2][5] = {{0, GPUFilterBox_8u_C1R, 0, 0, GPUFilterBox_8u_C4R},
|
static const FilterBox_t FilterBox_callers[2][5] = {{0, GPUFilterBox_8u_C1R, 0, GPUFilterBox_8u_C4R, GPUFilterBox_8u_C4R},
|
||||||
{0, GPUFilterBox_32F_C1R, 0, 0, GPUFilterBox_32F_C4R}
|
{0, GPUFilterBox_32F_C1R, 0, GPUFilterBox_32F_C4R, GPUFilterBox_32F_C4R}
|
||||||
};
|
};
|
||||||
//Remove this check if more data types need to be supported.
|
//Remove this check if more data types need to be supported.
|
||||||
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4)
|
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 ||
|
||||||
&& dstType == srcType);
|
srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
|
||||||
|
|
||||||
normalizeAnchor(anchor, ksize);
|
normalizeAnchor(anchor, ksize);
|
||||||
|
|
||||||
@@ -1155,7 +1155,7 @@ template <typename T>
|
|||||||
void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype)
|
void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype)
|
||||||
{
|
{
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
int channels = src.channels();
|
int channels = src.oclchannels();
|
||||||
|
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
string kernelName = "row_filter";
|
string kernelName = "row_filter";
|
||||||
@@ -1208,7 +1208,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
|
|||||||
//sanity checks
|
//sanity checks
|
||||||
CV_Assert(clCxt == dst.clCxt);
|
CV_Assert(clCxt == dst.clCxt);
|
||||||
CV_Assert(src.cols == dst.cols);
|
CV_Assert(src.cols == dst.cols);
|
||||||
CV_Assert(src.channels() == dst.channels());
|
CV_Assert(src.oclchannels() == dst.oclchannels());
|
||||||
CV_Assert(ksize == (anchor << 1) + 1);
|
CV_Assert(ksize == (anchor << 1) + 1);
|
||||||
int src_pix_per_row, dst_pix_per_row;
|
int src_pix_per_row, dst_pix_per_row;
|
||||||
int src_offset_x, src_offset_y, dst_offset_in_pixel;
|
int src_offset_x, src_offset_y, dst_offset_in_pixel;
|
||||||
@@ -1283,7 +1283,7 @@ template <typename T>
|
|||||||
void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype)
|
void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype)
|
||||||
{
|
{
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
int channels = src.channels();
|
int channels = src.oclchannels();
|
||||||
|
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
string kernelName = "col_filter";
|
string kernelName = "col_filter";
|
||||||
@@ -1308,7 +1308,7 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
char compile_option[256];
|
char compile_option[256];
|
||||||
|
|
||||||
|
|
||||||
size_t globalThreads[3];
|
size_t globalThreads[3];
|
||||||
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
|
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
|
||||||
@@ -1319,52 +1319,52 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
|
|||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
||||||
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||||
anchor, localThreads[0], localThreads[1], channels, btype,"float","uchar","convert_uchar_sat");
|
anchor, localThreads[0], localThreads[1], channels, btype, "float", "uchar", "convert_uchar_sat");
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
globalThreads[0] = ((dst.cols + 1) / 2 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
globalThreads[0] = ((dst.cols + 1) / 2 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
||||||
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||||
anchor, localThreads[0], localThreads[1], channels, btype,"float2","uchar2","convert_uchar2_sat");
|
anchor, localThreads[0], localThreads[1], channels, btype, "float2", "uchar2", "convert_uchar2_sat");
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
case 4:
|
case 4:
|
||||||
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
||||||
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||||
anchor, localThreads[0], localThreads[1], channels, btype,"float4","uchar4","convert_uchar4_sat");
|
anchor, localThreads[0], localThreads[1], channels, btype, "float4", "uchar4", "convert_uchar4_sat");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
||||||
switch(dst.type())
|
switch(dst.type())
|
||||||
{
|
{
|
||||||
case CV_32SC1:
|
case CV_32SC1:
|
||||||
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||||
anchor, localThreads[0], localThreads[1], channels, btype,"float","int","convert_int_sat");
|
anchor, localThreads[0], localThreads[1], channels, btype, "float", "int", "convert_int_sat");
|
||||||
break;
|
break;
|
||||||
case CV_32SC3:
|
case CV_32SC3:
|
||||||
case CV_32SC4:
|
case CV_32SC4:
|
||||||
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||||
anchor, localThreads[0], localThreads[1], channels, btype,"float4","int4","convert_int4_sat");
|
anchor, localThreads[0], localThreads[1], channels, btype, "float4", "int4", "convert_int4_sat");
|
||||||
break;
|
break;
|
||||||
case CV_32FC1:
|
case CV_32FC1:
|
||||||
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||||
anchor, localThreads[0], localThreads[1], channels, btype,"float","float","");
|
anchor, localThreads[0], localThreads[1], channels, btype, "float", "float", "");
|
||||||
break;
|
break;
|
||||||
case CV_32FC3:
|
case CV_32FC3:
|
||||||
case CV_32FC4:
|
case CV_32FC4:
|
||||||
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||||
anchor, localThreads[0], localThreads[1], channels, btype,"float4","float4","");
|
anchor, localThreads[0], localThreads[1], channels, btype, "float4", "float4", "");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//sanity checks
|
//sanity checks
|
||||||
CV_Assert(clCxt == dst.clCxt);
|
CV_Assert(clCxt == dst.clCxt);
|
||||||
CV_Assert(src.cols == dst.cols);
|
CV_Assert(src.cols == dst.cols);
|
||||||
CV_Assert(src.channels() == dst.channels());
|
CV_Assert(src.oclchannels() == dst.oclchannels());
|
||||||
CV_Assert(ksize == (anchor << 1) + 1);
|
CV_Assert(ksize == (anchor << 1) + 1);
|
||||||
int src_pix_per_row, dst_pix_per_row;
|
int src_pix_per_row, dst_pix_per_row;
|
||||||
int src_offset_x, src_offset_y, dst_offset_in_pixel;
|
int src_offset_x, src_offset_y, dst_offset_in_pixel;
|
||||||
@@ -1379,8 +1379,8 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
|
|||||||
args.push_back(make_pair(sizeof(cl_mem), &dst.data));
|
args.push_back(make_pair(sizeof(cl_mem), &dst.data));
|
||||||
args.push_back(make_pair(sizeof(cl_int), (void *)&dst.cols));
|
args.push_back(make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||||
args.push_back(make_pair(sizeof(cl_int), (void *)&dst.rows));
|
args.push_back(make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||||
args.push_back(make_pair(sizeof(cl_int),(void*)&src.wholecols));
|
args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
|
||||||
args.push_back(make_pair(sizeof(cl_int),(void*)&src.wholerows));
|
args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
|
||||||
args.push_back(make_pair(sizeof(cl_int), (void *)&src_pix_per_row));
|
args.push_back(make_pair(sizeof(cl_int), (void *)&src_pix_per_row));
|
||||||
//args.push_back(make_pair(sizeof(cl_int),(void*)&src_offset_x));
|
//args.push_back(make_pair(sizeof(cl_int),(void*)&src_offset_x));
|
||||||
//args.push_back(make_pair(sizeof(cl_int),(void*)&src_offset_y));
|
//args.push_back(make_pair(sizeof(cl_int),(void*)&src_offset_y));
|
||||||
@@ -1441,18 +1441,18 @@ Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int
|
|||||||
|
|
||||||
void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype)
|
void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype)
|
||||||
{
|
{
|
||||||
if((dst.cols!=dst.wholecols) || (dst.rows!=dst.wholerows))//has roi
|
if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
|
||||||
{
|
{
|
||||||
if((bordertype & cv::BORDER_ISOLATED) != 0)
|
if((bordertype & cv::BORDER_ISOLATED) != 0)
|
||||||
{
|
{
|
||||||
bordertype &= ~cv::BORDER_ISOLATED;
|
bordertype &= ~cv::BORDER_ISOLATED;
|
||||||
if((bordertype != cv::BORDER_CONSTANT) &&
|
if((bordertype != cv::BORDER_CONSTANT) &&
|
||||||
(bordertype != cv::BORDER_REPLICATE))
|
(bordertype != cv::BORDER_REPLICATE))
|
||||||
{
|
{
|
||||||
CV_Error(CV_StsBadArg,"unsupported border type");
|
CV_Error(CV_StsBadArg, "unsupported border type");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if( ddepth < 0 )
|
if( ddepth < 0 )
|
||||||
ddepth = src.depth();
|
ddepth = src.depth();
|
||||||
//CV_Assert(ddepth == src.depth());
|
//CV_Assert(ddepth == src.depth());
|
||||||
@@ -1464,10 +1464,10 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat
|
|||||||
|
|
||||||
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType )
|
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType )
|
||||||
{
|
{
|
||||||
Mat kx, ky;
|
Mat kx, ky;
|
||||||
getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F );
|
getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F );
|
||||||
return createSeparableLinearFilter_GPU(srcType, dstType,
|
return createSeparableLinearFilter_GPU(srcType, dstType,
|
||||||
kx, ky, Point(-1,-1), 0, borderType );
|
kx, ky, Point(-1, -1), 0, borderType );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -1517,9 +1517,9 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy,
|
|||||||
|
|
||||||
void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
|
void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
|
||||||
{
|
{
|
||||||
if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
|
if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1576,18 +1576,18 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
|
|||||||
src.copyTo(dst);
|
src.copyTo(dst);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if((dst.cols!=dst.wholecols) || (dst.rows!=dst.wholerows))//has roi
|
if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
|
||||||
{
|
{
|
||||||
if((bordertype & cv::BORDER_ISOLATED) != 0)
|
if((bordertype & cv::BORDER_ISOLATED) != 0)
|
||||||
{
|
{
|
||||||
bordertype &= ~cv::BORDER_ISOLATED;
|
bordertype &= ~cv::BORDER_ISOLATED;
|
||||||
if((bordertype != cv::BORDER_CONSTANT) &&
|
if((bordertype != cv::BORDER_CONSTANT) &&
|
||||||
(bordertype != cv::BORDER_REPLICATE))
|
(bordertype != cv::BORDER_REPLICATE))
|
||||||
{
|
{
|
||||||
CV_Error(CV_StsBadArg,"unsupported border type");
|
CV_Error(CV_StsBadArg, "unsupported border type");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
if( bordertype != BORDER_CONSTANT )
|
if( bordertype != BORDER_CONSTANT )
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -51,111 +51,114 @@
|
|||||||
#include "clAmdBlas.h"
|
#include "clAmdBlas.h"
|
||||||
|
|
||||||
#if !defined (HAVE_OPENCL)
|
#if !defined (HAVE_OPENCL)
|
||||||
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
|
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
|
||||||
void cv::ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha,
|
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
|
||||||
const oclMat& src3, double beta, oclMat& dst, int flags)
|
const oclMat &src3, double beta, oclMat &dst, int flags)
|
||||||
{
|
{
|
||||||
CV_Assert(src1.cols == src2.rows &&
|
CV_Assert(src1.cols == src2.rows &&
|
||||||
(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
|
(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
|
||||||
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
|
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
|
||||||
if(!src3.empty())
|
if(!src3.empty())
|
||||||
{
|
{
|
||||||
src3.copyTo(dst);
|
src3.copyTo(dst);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
dst.create(src1.rows, src2.cols, src1.type());
|
dst.create(src1.rows, src2.cols, src1.type());
|
||||||
dst.setTo(Scalar::all(0));
|
dst.setTo(Scalar::all(0));
|
||||||
}
|
}
|
||||||
openCLSafeCall( clAmdBlasSetup() );
|
openCLSafeCall( clAmdBlasSetup() );
|
||||||
|
|
||||||
const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
|
|
||||||
const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
|
|
||||||
const clAmdBlasOrder order = clAmdBlasRowMajor;
|
|
||||||
|
|
||||||
const int M = src1.rows;
|
const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
|
||||||
const int N = src2.cols;
|
const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
|
||||||
const int K = src1.cols;
|
const clAmdBlasOrder order = clAmdBlasRowMajor;
|
||||||
int lda = src1.step;
|
|
||||||
int ldb = src2.step;
|
const int M = src1.rows;
|
||||||
int ldc = dst.step;
|
const int N = src2.cols;
|
||||||
int offa = src1.offset;
|
const int K = src1.cols;
|
||||||
int offb = src2.offset;
|
int lda = src1.step;
|
||||||
int offc = dst.offset;
|
int ldb = src2.step;
|
||||||
|
int ldc = dst.step;
|
||||||
|
int offa = src1.offset;
|
||||||
|
int offb = src2.offset;
|
||||||
|
int offc = dst.offset;
|
||||||
|
|
||||||
|
|
||||||
switch(src1.type())
|
switch(src1.type())
|
||||||
{
|
{
|
||||||
case CV_32FC1:
|
case CV_32FC1:
|
||||||
lda /= sizeof(float);
|
lda /= sizeof(float);
|
||||||
ldb /= sizeof(float);
|
ldb /= sizeof(float);
|
||||||
ldc /= sizeof(float);
|
ldc /= sizeof(float);
|
||||||
offa /= sizeof(float);
|
offa /= sizeof(float);
|
||||||
offb /= sizeof(float);
|
offb /= sizeof(float);
|
||||||
offc /= sizeof(float);
|
offc /= sizeof(float);
|
||||||
openCLSafeCall
|
openCLSafeCall
|
||||||
(
|
(
|
||||||
clAmdBlasSgemmEx(order, transA, transB, M, N, K,
|
clAmdBlasSgemmEx(order, transA, transB, M, N, K,
|
||||||
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||||
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
case CV_64FC1:
|
case CV_64FC1:
|
||||||
lda /= sizeof(double);
|
lda /= sizeof(double);
|
||||||
ldb /= sizeof(double);
|
ldb /= sizeof(double);
|
||||||
ldc /= sizeof(double);
|
ldc /= sizeof(double);
|
||||||
offa /= sizeof(double);
|
offa /= sizeof(double);
|
||||||
offb /= sizeof(double);
|
offb /= sizeof(double);
|
||||||
offc /= sizeof(double);
|
offc /= sizeof(double);
|
||||||
openCLSafeCall
|
openCLSafeCall
|
||||||
(
|
(
|
||||||
clAmdBlasDgemmEx(order, transA, transB, M, N, K,
|
clAmdBlasDgemmEx(order, transA, transB, M, N, K,
|
||||||
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||||
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
case CV_32FC2:
|
case CV_32FC2:
|
||||||
{
|
{
|
||||||
lda /= sizeof(std::complex<float>);
|
lda /= sizeof(std::complex<float>);
|
||||||
ldb /= sizeof(std::complex<float>);
|
ldb /= sizeof(std::complex<float>);
|
||||||
ldc /= sizeof(std::complex<float>);
|
ldc /= sizeof(std::complex<float>);
|
||||||
offa /= sizeof(std::complex<float>);
|
offa /= sizeof(std::complex<float>);
|
||||||
offb /= sizeof(std::complex<float>);
|
offb /= sizeof(std::complex<float>);
|
||||||
offc /= sizeof(std::complex<float>);
|
offc /= sizeof(std::complex<float>);
|
||||||
cl_float2 alpha_2 = {{alpha, 0}};
|
cl_float2 alpha_2 = {{alpha, 0}};
|
||||||
cl_float2 beta_2 = {{beta, 0}};
|
cl_float2 beta_2 = {{beta, 0}};
|
||||||
openCLSafeCall
|
openCLSafeCall
|
||||||
(
|
(
|
||||||
clAmdBlasCgemmEx(order, transA, transB, M, N, K,
|
clAmdBlasCgemmEx(order, transA, transB, M, N, K,
|
||||||
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||||
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CV_64FC2:
|
case CV_64FC2:
|
||||||
{
|
{
|
||||||
lda /= sizeof(std::complex<double>);
|
lda /= sizeof(std::complex<double>);
|
||||||
ldb /= sizeof(std::complex<double>);
|
ldb /= sizeof(std::complex<double>);
|
||||||
ldc /= sizeof(std::complex<double>);
|
ldc /= sizeof(std::complex<double>);
|
||||||
offa /= sizeof(std::complex<double>);
|
offa /= sizeof(std::complex<double>);
|
||||||
offb /= sizeof(std::complex<double>);
|
offb /= sizeof(std::complex<double>);
|
||||||
offc /= sizeof(std::complex<double>);
|
offc /= sizeof(std::complex<double>);
|
||||||
cl_double2 alpha_2 = {{alpha, 0}};
|
cl_double2 alpha_2 = {{alpha, 0}};
|
||||||
cl_double2 beta_2 = {{beta, 0}};
|
cl_double2 beta_2 = {{beta, 0}};
|
||||||
openCLSafeCall
|
openCLSafeCall
|
||||||
(
|
(
|
||||||
clAmdBlasZgemmEx(order, transA, transB, M, N, K,
|
clAmdBlasZgemmEx(order, transA, transB, M, N, K,
|
||||||
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||||
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
clAmdBlasTeardown();
|
clAmdBlasTeardown();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -52,6 +52,7 @@
|
|||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <string>
|
||||||
#ifdef EMU
|
#ifdef EMU
|
||||||
#include "runCL.h"
|
#include "runCL.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -299,7 +300,7 @@ const float icv_stage_threshold_bias = 0.0001f;
|
|||||||
double globaltime = 0;
|
double globaltime = 0;
|
||||||
|
|
||||||
|
|
||||||
CvHaarClassifierCascade*
|
CvHaarClassifierCascade *
|
||||||
gpuCreateHaarClassifierCascade( int stage_count )
|
gpuCreateHaarClassifierCascade( int stage_count )
|
||||||
{
|
{
|
||||||
CvHaarClassifierCascade *cascade = 0;
|
CvHaarClassifierCascade *cascade = 0;
|
||||||
@@ -331,7 +332,7 @@ gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade )
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* create more efficient internal representation of haar classifier cascade */
|
/* create more efficient internal representation of haar classifier cascade */
|
||||||
GpuHidHaarClassifierCascade*
|
GpuHidHaarClassifierCascade *
|
||||||
gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier)
|
gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier)
|
||||||
{
|
{
|
||||||
GpuHidHaarClassifierCascade *out = 0;
|
GpuHidHaarClassifierCascade *out = 0;
|
||||||
@@ -888,6 +889,13 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
|
bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
|
||||||
bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0;
|
bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0;
|
||||||
|
|
||||||
|
//the Intel HD Graphics is unsupported
|
||||||
|
if (gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos)
|
||||||
|
{
|
||||||
|
cout << " Intel HD GPU device unsupported " << endl;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
//double t = 0;
|
//double t = 0;
|
||||||
if( maxSize.height == 0 || maxSize.width == 0 )
|
if( maxSize.height == 0 || maxSize.width == 0 )
|
||||||
{
|
{
|
||||||
@@ -948,7 +956,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
vector<float> scalev;
|
vector<float> scalev;
|
||||||
for(factor = 1.f;; factor *= scaleFactor)
|
for(factor = 1.f;; factor *= scaleFactor)
|
||||||
{
|
{
|
||||||
CvSize winSize = { cvRound(winSize0.width *factor), cvRound(winSize0.height *factor) };
|
CvSize winSize = { cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) };
|
||||||
sz.width = cvRound( gimg.cols / factor ) + 1;
|
sz.width = cvRound( gimg.cols / factor ) + 1;
|
||||||
sz.height = cvRound( gimg.rows / factor ) + 1;
|
sz.height = cvRound( gimg.rows / factor ) + 1;
|
||||||
CvSize sz1 = { sz.width - winSize0.width - 1, sz.height - winSize0.height - 1 };
|
CvSize sz1 = { sz.width - winSize0.width - 1, sz.height - winSize0.height - 1 };
|
||||||
@@ -985,7 +993,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
|
|
||||||
size_t blocksize = 8;
|
size_t blocksize = 8;
|
||||||
size_t localThreads[3] = { blocksize, blocksize , 1 };
|
size_t localThreads[3] = { blocksize, blocksize , 1 };
|
||||||
size_t globalThreads[3] = { grp_per_CU * ((gsum.clCxt)->impl->maxComputeUnits) *localThreads[0],
|
size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->impl->maxComputeUnits) *localThreads[0],
|
||||||
localThreads[1], 1
|
localThreads[1], 1
|
||||||
};
|
};
|
||||||
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
||||||
@@ -1067,7 +1075,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
//classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status);
|
//classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status);
|
||||||
//status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL);
|
//status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL);
|
||||||
|
|
||||||
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,nodenum * sizeof(GpuHidHaarTreeNode));
|
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
|
||||||
//openCLVerifyCall(status);
|
//openCLVerifyCall(status);
|
||||||
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
|
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
|
||||||
nodenum * sizeof(GpuHidHaarTreeNode),
|
nodenum * sizeof(GpuHidHaarTreeNode),
|
||||||
@@ -1104,10 +1112,10 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
int argcount = 0;
|
int argcount = 0;
|
||||||
//int grpnumperline = ((m + localThreads[0] - 1) / localThreads[0]);
|
//int grpnumperline = ((m + localThreads[0] - 1) / localThreads[0]);
|
||||||
//int totalgrp = ((n + localThreads[1] - 1) / localThreads[1])*grpnumperline;
|
//int totalgrp = ((n + localThreads[1] - 1) / localThreads[1])*grpnumperline;
|
||||||
// openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
|
// openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
|
||||||
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_mem),(void*)&cascadebuffer));
|
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_mem),(void*)&cascadebuffer));
|
||||||
|
|
||||||
vector<pair<size_t,const void *> > args;
|
vector<pair<size_t, const void *> > args;
|
||||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
|
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
|
||||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
|
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
|
||||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
|
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
|
||||||
@@ -1124,40 +1132,40 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
|
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
|
||||||
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
|
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
|
||||||
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
|
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
|
||||||
/*
|
/*
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/
|
||||||
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&n));
|
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&n));
|
||||||
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&grpnumperline));
|
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&grpnumperline));
|
||||||
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&totalgrp));
|
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&totalgrp));
|
||||||
|
|
||||||
// openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
|
// openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
// openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
|
// openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
|
||||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
|
||||||
//t = (double)cvGetTickCount() - t;
|
//t = (double)cvGetTickCount() - t;
|
||||||
//printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
//printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||||
//t = (double)cvGetTickCount();
|
//t = (double)cvGetTickCount();
|
||||||
//openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, 0, 4 * sizeof(int)*outputsz, candidate, 0, NULL, NULL));
|
//openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, 0, 4 * sizeof(int)*outputsz, candidate, 0, NULL, NULL));
|
||||||
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
|
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
|
||||||
|
|
||||||
for(int i = 0; i < outputsz; i++)
|
for(int i = 0; i < outputsz; i++)
|
||||||
if(candidate[4*i+2] != 0)
|
if(candidate[4 * i + 2] != 0)
|
||||||
allCandidates.push_back(Rect(candidate[4*i], candidate[4*i+1], candidate[4*i+2], candidate[4*i+3]));
|
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3]));
|
||||||
// t = (double)cvGetTickCount() - t;
|
// t = (double)cvGetTickCount() - t;
|
||||||
//printf( "post time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
//printf( "post time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||||
//t = (double)cvGetTickCount();
|
//t = (double)cvGetTickCount();
|
||||||
@@ -1168,7 +1176,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
|
openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
|
||||||
openCLSafeCall(clReleaseMemObject(nodebuffer));
|
openCLSafeCall(clReleaseMemObject(nodebuffer));
|
||||||
openCLSafeCall(clReleaseMemObject(candidatebuffer));
|
openCLSafeCall(clReleaseMemObject(candidatebuffer));
|
||||||
// openCLSafeCall(clReleaseKernel(kernel));
|
// openCLSafeCall(clReleaseKernel(kernel));
|
||||||
//t = (double)cvGetTickCount() - t;
|
//t = (double)cvGetTickCount() - t;
|
||||||
//printf( "release time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
//printf( "release time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||||
}
|
}
|
||||||
@@ -1200,8 +1208,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
cvRound(factor * winsize0.height) < gimg.rows - 10;
|
cvRound(factor * winsize0.height) < gimg.rows - 10;
|
||||||
n_factors++, factor *= scaleFactor )
|
n_factors++, factor *= scaleFactor )
|
||||||
{
|
{
|
||||||
CvSize winSize = { cvRound( winsize0.width *factor ),
|
CvSize winSize = { cvRound( winsize0.width * factor ),
|
||||||
cvRound( winsize0.height *factor )
|
cvRound( winsize0.height * factor )
|
||||||
};
|
};
|
||||||
if( winSize.width < minSize.width || winSize.height < minSize.height )
|
if( winSize.width < minSize.width || winSize.height < minSize.height )
|
||||||
{
|
{
|
||||||
@@ -1232,13 +1240,13 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
|
int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
|
||||||
sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
|
sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
|
||||||
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
|
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
|
||||||
nodenum * sizeof(GpuHidHaarTreeNode));
|
nodenum * sizeof(GpuHidHaarTreeNode));
|
||||||
//openCLVerifyCall(status);
|
//openCLVerifyCall(status);
|
||||||
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
|
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
|
||||||
nodenum * sizeof(GpuHidHaarTreeNode),
|
nodenum * sizeof(GpuHidHaarTreeNode),
|
||||||
node, 0, NULL, NULL));
|
node, 0, NULL, NULL));
|
||||||
cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE,
|
cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE,
|
||||||
loopcount * nodenum * sizeof(GpuHidHaarTreeNode));
|
loopcount * nodenum * sizeof(GpuHidHaarTreeNode));
|
||||||
int startstage = 0;
|
int startstage = 0;
|
||||||
int endstage = gcascade->count;
|
int endstage = gcascade->count;
|
||||||
//cl_kernel kernel;
|
//cl_kernel kernel;
|
||||||
@@ -1270,25 +1278,25 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
int startnodenum = nodenum * i;
|
int startnodenum = nodenum * i;
|
||||||
int argcounts = 0;
|
int argcounts = 0;
|
||||||
float factor2 = (float)factor;
|
float factor2 = (float)factor;
|
||||||
/*
|
/*
|
||||||
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer));
|
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer));
|
||||||
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer));
|
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer));
|
||||||
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2));
|
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2));
|
||||||
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i]));
|
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i]));
|
||||||
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum));
|
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum));
|
||||||
*/
|
*/
|
||||||
|
|
||||||
vector<pair<size_t,const void *> > args1;
|
vector<pair<size_t, const void *> > args1;
|
||||||
args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
|
args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
|
||||||
args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
|
args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
|
||||||
args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
|
args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
|
||||||
args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
|
args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
|
||||||
args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
|
args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
|
||||||
|
|
||||||
size_t globalThreads2[3] = {nodenum,1,1};
|
size_t globalThreads2[3] = {nodenum, 1, 1};
|
||||||
size_t localThreads2[3] = {256,1,1};
|
size_t localThreads2[3] = {256, 1, 1};
|
||||||
|
|
||||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
|
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
|
||||||
|
|
||||||
//clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel2, 1, NULL, globalThreads2, 0, 0, NULL, NULL);
|
//clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel2, 1, NULL, globalThreads2, 0, 0, NULL, NULL);
|
||||||
//clFinish(gsum.clCxt->impl->clCmdQueue);
|
//clFinish(gsum.clCxt->impl->clCmdQueue);
|
||||||
@@ -1328,7 +1336,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&correctionbuffer));
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&correctionbuffer));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));*/
|
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));*/
|
||||||
|
|
||||||
vector<pair<size_t,const void *> > args;
|
vector<pair<size_t, const void *> > args;
|
||||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
|
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
|
||||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
|
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
|
||||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
|
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
|
||||||
@@ -1345,9 +1353,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
|
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
|
||||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
|
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
|
||||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
|
args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
|
||||||
|
|
||||||
|
|
||||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
|
||||||
//openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
|
//openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
|
||||||
//openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
|
//openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
|
||||||
|
|
||||||
@@ -1356,8 +1364,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
|
|
||||||
for(int i = 0; i < outputsz; i++)
|
for(int i = 0; i < outputsz; i++)
|
||||||
{
|
{
|
||||||
if(candidate[4*i+2] != 0)
|
if(candidate[4 * i + 2] != 0)
|
||||||
allCandidates.push_back(Rect(candidate[4*i], candidate[4*i+1], candidate[4*i+2], candidate[4*i+3]));
|
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3]));
|
||||||
}
|
}
|
||||||
|
|
||||||
free(scaleinfo);
|
free(scaleinfo);
|
||||||
@@ -1420,7 +1428,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
CvHaarClassifierCascade*
|
CvHaarClassifierCascade *
|
||||||
gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
|
gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@@ -1444,7 +1452,7 @@ gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
|
|||||||
assert( count > 0 );
|
assert( count > 0 );
|
||||||
cascade->stage_classifier[i].count = count;
|
cascade->stage_classifier[i].count = count;
|
||||||
cascade->stage_classifier[i].classifier =
|
cascade->stage_classifier[i].classifier =
|
||||||
(CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0]));
|
(CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0]));
|
||||||
|
|
||||||
for( j = 0; j < count; j++ )
|
for( j = 0; j < count; j++ )
|
||||||
{
|
{
|
||||||
@@ -1456,11 +1464,11 @@ gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
|
|||||||
stage += dl;
|
stage += dl;
|
||||||
|
|
||||||
classifier->haar_feature = (CvHaarFeature *) cvAlloc(
|
classifier->haar_feature = (CvHaarFeature *) cvAlloc(
|
||||||
classifier->count * ( sizeof( *classifier->haar_feature ) +
|
classifier->count * ( sizeof( *classifier->haar_feature ) +
|
||||||
sizeof( *classifier->threshold ) +
|
sizeof( *classifier->threshold ) +
|
||||||
sizeof( *classifier->left ) +
|
sizeof( *classifier->left ) +
|
||||||
sizeof( *classifier->right ) ) +
|
sizeof( *classifier->right ) ) +
|
||||||
(classifier->count + 1) * sizeof( *classifier->alpha ) );
|
(classifier->count + 1) * sizeof( *classifier->alpha ) );
|
||||||
classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
|
classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
|
||||||
classifier->left = (int *) (classifier->threshold + classifier->count);
|
classifier->left = (int *) (classifier->threshold + classifier->count);
|
||||||
classifier->right = (int *) (classifier->left + classifier->count);
|
classifier->right = (int *) (classifier->left + classifier->count);
|
||||||
@@ -1478,8 +1486,8 @@ gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
|
|||||||
CvRect r;
|
CvRect r;
|
||||||
int band = 0;
|
int band = 0;
|
||||||
sscanf( stage, "%d%d%d%d%d%f%n",
|
sscanf( stage, "%d%d%d%d%d%f%n",
|
||||||
&r.x, &r.y, &r.width, &r.height, &band,
|
&r.x, &r.y, &r.width, &r.height, &band,
|
||||||
&(classifier->haar_feature[l].rect[k].weight), &dl );
|
&(classifier->haar_feature[l].rect[k].weight), &dl );
|
||||||
stage += dl;
|
stage += dl;
|
||||||
classifier->haar_feature[l].rect[k].r = r;
|
classifier->haar_feature[l].rect[k].r = r;
|
||||||
}
|
}
|
||||||
@@ -1491,12 +1499,12 @@ gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
|
|||||||
for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ )
|
for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ )
|
||||||
{
|
{
|
||||||
memset( classifier->haar_feature[l].rect + k, 0,
|
memset( classifier->haar_feature[l].rect + k, 0,
|
||||||
sizeof(classifier->haar_feature[l].rect[k]) );
|
sizeof(classifier->haar_feature[l].rect[k]) );
|
||||||
}
|
}
|
||||||
|
|
||||||
sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]),
|
sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]),
|
||||||
&(classifier->left[l]),
|
&(classifier->left[l]),
|
||||||
&(classifier->right[l]), &dl );
|
&(classifier->right[l]), &dl );
|
||||||
stage += dl;
|
stage += dl;
|
||||||
}
|
}
|
||||||
for( l = 0; l <= classifier->count; l++ )
|
for( l = 0; l <= classifier->count; l++ )
|
||||||
@@ -1536,7 +1544,7 @@ gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
|
|||||||
#define _MAX_PATH 1024
|
#define _MAX_PATH 1024
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
CV_IMPL CvHaarClassifierCascade*
|
CV_IMPL CvHaarClassifierCascade *
|
||||||
gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size )
|
gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size )
|
||||||
{
|
{
|
||||||
const char **input_cascade = 0;
|
const char **input_cascade = 0;
|
||||||
@@ -1649,7 +1657,7 @@ gpuIsHaarClassifier( const void *struct_ptr )
|
|||||||
return CV_IS_HAAR_CLASSIFIER( struct_ptr );
|
return CV_IS_HAAR_CLASSIFIER( struct_ptr );
|
||||||
}
|
}
|
||||||
|
|
||||||
void*
|
void *
|
||||||
gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
||||||
{
|
{
|
||||||
CvHaarClassifierCascade *cascade = NULL;
|
CvHaarClassifierCascade *cascade = NULL;
|
||||||
@@ -1699,15 +1707,15 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
|
|
||||||
trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME );
|
trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME );
|
||||||
if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag )
|
if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag )
|
||||||
|| trees_fn->data.seq->total <= 0 )
|
|| trees_fn->data.seq->total <= 0 )
|
||||||
{
|
{
|
||||||
sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i );
|
sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
|
|
||||||
cascade->stage_classifier[i].classifier =
|
cascade->stage_classifier[i].classifier =
|
||||||
(CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total
|
(CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total
|
||||||
* sizeof( cascade->stage_classifier[i].classifier[0] ) );
|
* sizeof( cascade->stage_classifier[i].classifier[0] ) );
|
||||||
for( j = 0; j < trees_fn->data.seq->total; ++j )
|
for( j = 0; j < trees_fn->data.seq->total; ++j )
|
||||||
{
|
{
|
||||||
cascade->stage_classifier[i].classifier[j].haar_feature = NULL;
|
cascade->stage_classifier[i].classifier[j].haar_feature = NULL;
|
||||||
@@ -1727,17 +1735,17 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 )
|
if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 )
|
||||||
{
|
{
|
||||||
sprintf( buf, "Tree node is not a valid sequence."
|
sprintf( buf, "Tree node is not a valid sequence."
|
||||||
" (stage %d, tree %d)", i, j );
|
" (stage %d, tree %d)", i, j );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
|
|
||||||
classifier->count = tree_fn->data.seq->total;
|
classifier->count = tree_fn->data.seq->total;
|
||||||
classifier->haar_feature = (CvHaarFeature *) cvAlloc(
|
classifier->haar_feature = (CvHaarFeature *) cvAlloc(
|
||||||
classifier->count * ( sizeof( *classifier->haar_feature ) +
|
classifier->count * ( sizeof( *classifier->haar_feature ) +
|
||||||
sizeof( *classifier->threshold ) +
|
sizeof( *classifier->threshold ) +
|
||||||
sizeof( *classifier->left ) +
|
sizeof( *classifier->left ) +
|
||||||
sizeof( *classifier->right ) ) +
|
sizeof( *classifier->right ) ) +
|
||||||
(classifier->count + 1) * sizeof( *classifier->alpha ) );
|
(classifier->count + 1) * sizeof( *classifier->alpha ) );
|
||||||
classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
|
classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
|
||||||
classifier->left = (int *) (classifier->threshold + classifier->count);
|
classifier->left = (int *) (classifier->threshold + classifier->count);
|
||||||
classifier->right = (int *) (classifier->left + classifier->count);
|
classifier->right = (int *) (classifier->left + classifier->count);
|
||||||
@@ -1755,23 +1763,23 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( !CV_NODE_IS_MAP( node_fn->tag ) )
|
if( !CV_NODE_IS_MAP( node_fn->tag ) )
|
||||||
{
|
{
|
||||||
sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)",
|
sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)",
|
||||||
k, i, j );
|
k, i, j );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME );
|
feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME );
|
||||||
if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) )
|
if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) )
|
||||||
{
|
{
|
||||||
sprintf( buf, "Feature node is not a valid map. "
|
sprintf( buf, "Feature node is not a valid map. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME );
|
rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME );
|
||||||
if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag )
|
if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag )
|
||||||
|| rects_fn->data.seq->total < 1
|
|| rects_fn->data.seq->total < 1
|
||||||
|| rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX )
|
|| rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX )
|
||||||
{
|
{
|
||||||
sprintf( buf, "Rects node is not a valid sequence. "
|
sprintf( buf, "Rects node is not a valid sequence. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
cvStartReadSeq( rects_fn->data.seq, &rects_reader );
|
cvStartReadSeq( rects_fn->data.seq, &rects_reader );
|
||||||
@@ -1784,7 +1792,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 )
|
if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 )
|
||||||
{
|
{
|
||||||
sprintf( buf, "Rect %d is not a valid sequence. "
|
sprintf( buf, "Rect %d is not a valid sequence. "
|
||||||
"(stage %d, tree %d, node %d)", l, i, j, k );
|
"(stage %d, tree %d, node %d)", l, i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1792,7 +1800,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
|
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
|
||||||
{
|
{
|
||||||
sprintf( buf, "x coordinate must be non-negative integer. "
|
sprintf( buf, "x coordinate must be non-negative integer. "
|
||||||
"(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
|
"(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
r.x = fn->data.i;
|
r.x = fn->data.i;
|
||||||
@@ -1800,27 +1808,27 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
|
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
|
||||||
{
|
{
|
||||||
sprintf( buf, "y coordinate must be non-negative integer. "
|
sprintf( buf, "y coordinate must be non-negative integer. "
|
||||||
"(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
|
"(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
r.y = fn->data.i;
|
r.y = fn->data.i;
|
||||||
fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 );
|
fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 );
|
||||||
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
|
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
|
||||||
|| r.x + fn->data.i > cascade->orig_window_size.width )
|
|| r.x + fn->data.i > cascade->orig_window_size.width )
|
||||||
{
|
{
|
||||||
sprintf( buf, "width must be positive integer and "
|
sprintf( buf, "width must be positive integer and "
|
||||||
"(x + width) must not exceed window width. "
|
"(x + width) must not exceed window width. "
|
||||||
"(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
|
"(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
r.width = fn->data.i;
|
r.width = fn->data.i;
|
||||||
fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 );
|
fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 );
|
||||||
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
|
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
|
||||||
|| r.y + fn->data.i > cascade->orig_window_size.height )
|
|| r.y + fn->data.i > cascade->orig_window_size.height )
|
||||||
{
|
{
|
||||||
sprintf( buf, "height must be positive integer and "
|
sprintf( buf, "height must be positive integer and "
|
||||||
"(y + height) must not exceed window height. "
|
"(y + height) must not exceed window height. "
|
||||||
"(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
|
"(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
r.height = fn->data.i;
|
r.height = fn->data.i;
|
||||||
@@ -1828,7 +1836,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( !CV_NODE_IS_REAL( fn->tag ) )
|
if( !CV_NODE_IS_REAL( fn->tag ) )
|
||||||
{
|
{
|
||||||
sprintf( buf, "weight must be real number. "
|
sprintf( buf, "weight must be real number. "
|
||||||
"(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
|
"(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1847,7 +1855,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( !fn || !CV_NODE_IS_INT( fn->tag ) )
|
if( !fn || !CV_NODE_IS_INT( fn->tag ) )
|
||||||
{
|
{
|
||||||
sprintf( buf, "tilted must be 0 or 1. "
|
sprintf( buf, "tilted must be 0 or 1. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
classifier->haar_feature[k].tilted = ( fn->data.i != 0 );
|
classifier->haar_feature[k].tilted = ( fn->data.i != 0 );
|
||||||
@@ -1855,7 +1863,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( !fn || !CV_NODE_IS_REAL( fn->tag ) )
|
if( !fn || !CV_NODE_IS_REAL( fn->tag ) )
|
||||||
{
|
{
|
||||||
sprintf( buf, "threshold must be real number. "
|
sprintf( buf, "threshold must be real number. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
classifier->threshold[k] = (float) fn->data.f;
|
classifier->threshold[k] = (float) fn->data.f;
|
||||||
@@ -1863,10 +1871,10 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( fn )
|
if( fn )
|
||||||
{
|
{
|
||||||
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
|
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
|
||||||
|| fn->data.i >= tree_fn->data.seq->total )
|
|| fn->data.i >= tree_fn->data.seq->total )
|
||||||
{
|
{
|
||||||
sprintf( buf, "left node must be valid node number. "
|
sprintf( buf, "left node must be valid node number. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
/* left node */
|
/* left node */
|
||||||
@@ -1878,20 +1886,20 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( !fn )
|
if( !fn )
|
||||||
{
|
{
|
||||||
sprintf( buf, "left node or left value must be specified. "
|
sprintf( buf, "left node or left value must be specified. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
if( !CV_NODE_IS_REAL( fn->tag ) )
|
if( !CV_NODE_IS_REAL( fn->tag ) )
|
||||||
{
|
{
|
||||||
sprintf( buf, "left value must be real number. "
|
sprintf( buf, "left value must be real number. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
/* left value */
|
/* left value */
|
||||||
if( last_idx >= classifier->count + 1 )
|
if( last_idx >= classifier->count + 1 )
|
||||||
{
|
{
|
||||||
sprintf( buf, "Tree structure is broken: too many values. "
|
sprintf( buf, "Tree structure is broken: too many values. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
classifier->left[k] = -last_idx;
|
classifier->left[k] = -last_idx;
|
||||||
@@ -1901,10 +1909,10 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( fn )
|
if( fn )
|
||||||
{
|
{
|
||||||
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
|
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
|
||||||
|| fn->data.i >= tree_fn->data.seq->total )
|
|| fn->data.i >= tree_fn->data.seq->total )
|
||||||
{
|
{
|
||||||
sprintf( buf, "right node must be valid node number. "
|
sprintf( buf, "right node must be valid node number. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
/* right node */
|
/* right node */
|
||||||
@@ -1916,20 +1924,20 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( !fn )
|
if( !fn )
|
||||||
{
|
{
|
||||||
sprintf( buf, "right node or right value must be specified. "
|
sprintf( buf, "right node or right value must be specified. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
if( !CV_NODE_IS_REAL( fn->tag ) )
|
if( !CV_NODE_IS_REAL( fn->tag ) )
|
||||||
{
|
{
|
||||||
sprintf( buf, "right value must be real number. "
|
sprintf( buf, "right value must be real number. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
/* right value */
|
/* right value */
|
||||||
if( last_idx >= classifier->count + 1 )
|
if( last_idx >= classifier->count + 1 )
|
||||||
{
|
{
|
||||||
sprintf( buf, "Tree structure is broken: too many values. "
|
sprintf( buf, "Tree structure is broken: too many values. "
|
||||||
"(stage %d, tree %d, node %d)", i, j, k );
|
"(stage %d, tree %d, node %d)", i, j, k );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
classifier->right[k] = -last_idx;
|
classifier->right[k] = -last_idx;
|
||||||
@@ -1941,7 +1949,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
if( last_idx != classifier->count + 1 )
|
if( last_idx != classifier->count + 1 )
|
||||||
{
|
{
|
||||||
sprintf( buf, "Tree structure is broken: too few values. "
|
sprintf( buf, "Tree structure is broken: too few values. "
|
||||||
"(stage %d, tree %d)", i, j );
|
"(stage %d, tree %d)", i, j );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1961,7 +1969,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
|
|
||||||
fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME );
|
fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME );
|
||||||
if( !fn || !CV_NODE_IS_INT( fn->tag )
|
if( !fn || !CV_NODE_IS_INT( fn->tag )
|
||||||
|| fn->data.i < -1 || fn->data.i >= cascade->count )
|
|| fn->data.i < -1 || fn->data.i >= cascade->count )
|
||||||
{
|
{
|
||||||
sprintf( buf, "parent must be integer number. (stage %d)", i );
|
sprintf( buf, "parent must be integer number. (stage %d)", i );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
@@ -1969,7 +1977,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
parent = fn->data.i;
|
parent = fn->data.i;
|
||||||
fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME );
|
fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME );
|
||||||
if( !fn || !CV_NODE_IS_INT( fn->tag )
|
if( !fn || !CV_NODE_IS_INT( fn->tag )
|
||||||
|| fn->data.i < -1 || fn->data.i >= cascade->count )
|
|| fn->data.i < -1 || fn->data.i >= cascade->count )
|
||||||
{
|
{
|
||||||
sprintf( buf, "next must be integer number. (stage %d)", i );
|
sprintf( buf, "next must be integer number. (stage %d)", i );
|
||||||
CV_Error( CV_StsError, buf );
|
CV_Error( CV_StsError, buf );
|
||||||
@@ -1993,7 +2001,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
|
|||||||
|
|
||||||
void
|
void
|
||||||
gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr,
|
gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr,
|
||||||
CvAttrList attributes )
|
CvAttrList attributes )
|
||||||
{
|
{
|
||||||
int i, j, k, l;
|
int i, j, k, l;
|
||||||
char buf[256];
|
char buf[256];
|
||||||
@@ -2066,7 +2074,7 @@ CvAttrList attributes )
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME,
|
cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME,
|
||||||
tree->alpha[-tree->left[k]] );
|
tree->alpha[-tree->left[k]] );
|
||||||
}
|
}
|
||||||
|
|
||||||
if( tree->right[k] > 0 )
|
if( tree->right[k] > 0 )
|
||||||
@@ -2076,7 +2084,7 @@ CvAttrList attributes )
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME,
|
cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME,
|
||||||
tree->alpha[-tree->right[k]] );
|
tree->alpha[-tree->right[k]] );
|
||||||
}
|
}
|
||||||
|
|
||||||
cvEndWriteStruct( fs ); /* split */
|
cvEndWriteStruct( fs ); /* split */
|
||||||
@@ -2098,14 +2106,14 @@ CvAttrList attributes )
|
|||||||
cvEndWriteStruct( fs ); /* root */
|
cvEndWriteStruct( fs ); /* root */
|
||||||
}
|
}
|
||||||
|
|
||||||
void*
|
void *
|
||||||
gpuCloneHaarClassifier( const void *struct_ptr )
|
gpuCloneHaarClassifier( const void *struct_ptr )
|
||||||
{
|
{
|
||||||
CvHaarClassifierCascade *cascade = NULL;
|
CvHaarClassifierCascade *cascade = NULL;
|
||||||
|
|
||||||
int i, j, k, n;
|
int i, j, k, n;
|
||||||
const CvHaarClassifierCascade *cascade_src =
|
const CvHaarClassifierCascade *cascade_src =
|
||||||
(const CvHaarClassifierCascade *) struct_ptr;
|
(const CvHaarClassifierCascade *) struct_ptr;
|
||||||
|
|
||||||
n = cascade_src->count;
|
n = cascade_src->count;
|
||||||
cascade = gpuCreateHaarClassifierCascade(n);
|
cascade = gpuCreateHaarClassifierCascade(n);
|
||||||
@@ -2120,8 +2128,8 @@ gpuCloneHaarClassifier( const void *struct_ptr )
|
|||||||
|
|
||||||
cascade->stage_classifier[i].count = 0;
|
cascade->stage_classifier[i].count = 0;
|
||||||
cascade->stage_classifier[i].classifier =
|
cascade->stage_classifier[i].classifier =
|
||||||
(CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count
|
(CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count
|
||||||
* sizeof( cascade->stage_classifier[i].classifier[0] ) );
|
* sizeof( cascade->stage_classifier[i].classifier[0] ) );
|
||||||
|
|
||||||
cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count;
|
cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count;
|
||||||
|
|
||||||
@@ -2131,17 +2139,17 @@ gpuCloneHaarClassifier( const void *struct_ptr )
|
|||||||
for( j = 0; j < cascade->stage_classifier[i].count; ++j )
|
for( j = 0; j < cascade->stage_classifier[i].count; ++j )
|
||||||
{
|
{
|
||||||
const CvHaarClassifier *classifier_src =
|
const CvHaarClassifier *classifier_src =
|
||||||
&cascade_src->stage_classifier[i].classifier[j];
|
&cascade_src->stage_classifier[i].classifier[j];
|
||||||
CvHaarClassifier *classifier =
|
CvHaarClassifier *classifier =
|
||||||
&cascade->stage_classifier[i].classifier[j];
|
&cascade->stage_classifier[i].classifier[j];
|
||||||
|
|
||||||
classifier->count = classifier_src->count;
|
classifier->count = classifier_src->count;
|
||||||
classifier->haar_feature = (CvHaarFeature *) cvAlloc(
|
classifier->haar_feature = (CvHaarFeature *) cvAlloc(
|
||||||
classifier->count * ( sizeof( *classifier->haar_feature ) +
|
classifier->count * ( sizeof( *classifier->haar_feature ) +
|
||||||
sizeof( *classifier->threshold ) +
|
sizeof( *classifier->threshold ) +
|
||||||
sizeof( *classifier->left ) +
|
sizeof( *classifier->left ) +
|
||||||
sizeof( *classifier->right ) ) +
|
sizeof( *classifier->right ) ) +
|
||||||
(classifier->count + 1) * sizeof( *classifier->alpha ) );
|
(classifier->count + 1) * sizeof( *classifier->alpha ) );
|
||||||
classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
|
classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
|
||||||
classifier->left = (int *) (classifier->threshold + classifier->count);
|
classifier->left = (int *) (classifier->threshold + classifier->count);
|
||||||
classifier->right = (int *) (classifier->left + classifier->count);
|
classifier->right = (int *) (classifier->left + classifier->count);
|
||||||
@@ -2155,7 +2163,7 @@ gpuCloneHaarClassifier( const void *struct_ptr )
|
|||||||
classifier->alpha[k] = classifier_src->alpha[k];
|
classifier->alpha[k] = classifier_src->alpha[k];
|
||||||
}
|
}
|
||||||
classifier->alpha[classifier->count] =
|
classifier->alpha[classifier->count] =
|
||||||
classifier_src->alpha[classifier->count];
|
classifier_src->alpha[classifier->count];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2164,9 +2172,9 @@ gpuCloneHaarClassifier( const void *struct_ptr )
|
|||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier,
|
CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier,
|
||||||
(CvReleaseFunc)gpuReleaseHaarClassifierCascade,
|
(CvReleaseFunc)gpuReleaseHaarClassifierCascade,
|
||||||
gpuReadHaarClassifier, gpuWriteHaarClassifier,
|
gpuReadHaarClassifier, gpuWriteHaarClassifier,
|
||||||
gpuCloneHaarClassifier );
|
gpuCloneHaarClassifier );
|
||||||
|
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
@@ -2185,14 +2193,14 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
void HaarClassifierCascade::detectMultiScale( const Mat &image,
|
void HaarClassifierCascade::detectMultiScale( const Mat &image,
|
||||||
Vector<Rect>& objects, double scaleFactor,
|
Vector<Rect> &objects, double scaleFactor,
|
||||||
int minNeighbors, int flags,
|
int minNeighbors, int flags,
|
||||||
Size minSize )
|
Size minSize )
|
||||||
{
|
{
|
||||||
MemStorage storage(cvCreateMemStorage(0));
|
MemStorage storage(cvCreateMemStorage(0));
|
||||||
CvMat _image = image;
|
CvMat _image = image;
|
||||||
CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor,
|
CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor,
|
||||||
minNeighbors, flags, minSize );
|
minNeighbors, flags, minSize );
|
||||||
Seq<Rect>(_objects).copyTo(objects);
|
Seq<Rect>(_objects).copyTo(objects);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2202,7 +2210,7 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum,
|
void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum,
|
||||||
const Mat &tilted, double scale )
|
const Mat &tilted, double scale )
|
||||||
{
|
{
|
||||||
CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted;
|
CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted;
|
||||||
gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale );
|
gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale );
|
||||||
@@ -2473,8 +2481,8 @@ else
|
|||||||
|
|
||||||
CV_INLINE
|
CV_INLINE
|
||||||
double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier,
|
double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier,
|
||||||
double variance_norm_factor,
|
double variance_norm_factor,
|
||||||
size_t p_offset )
|
size_t p_offset )
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
@@ -2500,7 +2508,7 @@ size_t p_offset )
|
|||||||
|
|
||||||
CV_IMPL int
|
CV_IMPL int
|
||||||
gpuRunHaarClassifierCascade( const CvHaarClassifierCascade *_cascade,
|
gpuRunHaarClassifierCascade( const CvHaarClassifierCascade *_cascade,
|
||||||
CvPoint pt, int start_stage )
|
CvPoint pt, int start_stage )
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
int result = -1;
|
int result = -1;
|
||||||
@@ -2586,9 +2594,9 @@ namespace cv
|
|||||||
struct gpuHaarDetectObjects_ScaleImage_Invoker
|
struct gpuHaarDetectObjects_ScaleImage_Invoker
|
||||||
{
|
{
|
||||||
gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade,
|
gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade,
|
||||||
int _stripSize, double _factor,
|
int _stripSize, double _factor,
|
||||||
const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1,
|
const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1,
|
||||||
Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec )
|
Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec )
|
||||||
{
|
{
|
||||||
cascade = _cascade;
|
cascade = _cascade;
|
||||||
stripSize = _stripSize;
|
stripSize = _stripSize;
|
||||||
@@ -2614,7 +2622,7 @@ namespace cv
|
|||||||
{
|
{
|
||||||
if( gpuRunHaarClassifierCascade( cascade, cvPoint(x, y), 0 ) > 0 )
|
if( gpuRunHaarClassifierCascade( cascade, cvPoint(x, y), 0 ) > 0 )
|
||||||
vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor),
|
vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor),
|
||||||
winSize.width, winSize.height));
|
winSize.width, winSize.height));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2630,9 +2638,9 @@ namespace cv
|
|||||||
struct gpuHaarDetectObjects_ScaleCascade_Invoker
|
struct gpuHaarDetectObjects_ScaleCascade_Invoker
|
||||||
{
|
{
|
||||||
gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade,
|
gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade,
|
||||||
Size _winsize, const Range &_xrange, double _ystep,
|
Size _winsize, const Range &_xrange, double _ystep,
|
||||||
size_t _sumstep, const int **_p, const int **_pq,
|
size_t _sumstep, const int **_p, const int **_pq,
|
||||||
ConcurrentRectVector &_vec )
|
ConcurrentRectVector &_vec )
|
||||||
{
|
{
|
||||||
cascade = _cascade;
|
cascade = _cascade;
|
||||||
winsize = _winsize;
|
winsize = _winsize;
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -77,31 +77,31 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
void openCLMallocPitch(Context * /*clCxt*/, void ** /*dev_ptr*/, size_t * /*pitch*/,
|
void openCLMallocPitch(Context * /*clCxt*/, void ** /*dev_ptr*/, size_t * /*pitch*/,
|
||||||
size_t /*widthInBytes*/, size_t /*height*/)
|
size_t /*widthInBytes*/, size_t /*height*/)
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
void openCLMemcpy2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/,
|
void openCLMemcpy2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/,
|
||||||
const void * /*src*/, size_t /*spitch*/,
|
const void * /*src*/, size_t /*spitch*/,
|
||||||
size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/)
|
size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/)
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
void openCLCopyBuffer2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/,
|
void openCLCopyBuffer2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/,
|
||||||
const void * /*src*/, size_t /*spitch*/,
|
const void * /*src*/, size_t /*spitch*/,
|
||||||
size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/)
|
size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/)
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_mem openCLCreateBuffer(Context *,size_t, size_t)
|
cl_mem openCLCreateBuffer(Context *, size_t, size_t)
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
void openCLReadBuffer(Context *, cl_mem, void*, size_t)
|
void openCLReadBuffer(Context *, cl_mem, void *, size_t)
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
}
|
}
|
||||||
@@ -112,19 +112,19 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
cl_kernel openCLGetKernelFromSource(const Context * /*clCxt*/,
|
cl_kernel openCLGetKernelFromSource(const Context * /*clCxt*/,
|
||||||
const char ** /*fileName*/, string /*kernelName*/)
|
const char ** /*fileName*/, string /*kernelName*/)
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
void openCLVerifyKernel(const Context * /*clCxt*/, cl_kernel /*kernel*/, size_t * /*blockSize*/,
|
void openCLVerifyKernel(const Context * /*clCxt*/, cl_kernel /*kernel*/, size_t * /*blockSize*/,
|
||||||
size_t * /*globalThreads*/, size_t * /*localThreads*/)
|
size_t * /*globalThreads*/, size_t * /*localThreads*/)
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
|
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
|
||||||
const size_t size)
|
const size_t size)
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
}
|
}
|
||||||
@@ -226,7 +226,7 @@ namespace cv
|
|||||||
int double_support;
|
int double_support;
|
||||||
Impl()
|
Impl()
|
||||||
{
|
{
|
||||||
memset(extra_options,0,512);
|
memset(extra_options, 0, 512);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -240,23 +240,23 @@ namespace cv
|
|||||||
cl_device_type _devicetype;
|
cl_device_type _devicetype;
|
||||||
switch(devicetype)
|
switch(devicetype)
|
||||||
{
|
{
|
||||||
case CVCL_DEVICE_TYPE_DEFAULT:
|
case CVCL_DEVICE_TYPE_DEFAULT:
|
||||||
_devicetype = CL_DEVICE_TYPE_DEFAULT;
|
_devicetype = CL_DEVICE_TYPE_DEFAULT;
|
||||||
break;
|
break;
|
||||||
case CVCL_DEVICE_TYPE_CPU:
|
case CVCL_DEVICE_TYPE_CPU:
|
||||||
_devicetype = CL_DEVICE_TYPE_CPU;
|
_devicetype = CL_DEVICE_TYPE_CPU;
|
||||||
break;
|
break;
|
||||||
case CVCL_DEVICE_TYPE_GPU:
|
case CVCL_DEVICE_TYPE_GPU:
|
||||||
_devicetype = CL_DEVICE_TYPE_GPU;
|
_devicetype = CL_DEVICE_TYPE_GPU;
|
||||||
break;
|
break;
|
||||||
case CVCL_DEVICE_TYPE_ACCELERATOR:
|
case CVCL_DEVICE_TYPE_ACCELERATOR:
|
||||||
_devicetype = CL_DEVICE_TYPE_ACCELERATOR;
|
_devicetype = CL_DEVICE_TYPE_ACCELERATOR;
|
||||||
break;
|
break;
|
||||||
case CVCL_DEVICE_TYPE_ALL:
|
case CVCL_DEVICE_TYPE_ALL:
|
||||||
_devicetype = CL_DEVICE_TYPE_ALL;
|
_devicetype = CL_DEVICE_TYPE_ALL;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_GpuApiCallError,"Unkown device type");
|
CV_Error(CV_GpuApiCallError, "Unkown device type");
|
||||||
}
|
}
|
||||||
int devcienums = 0;
|
int devcienums = 0;
|
||||||
// Platform info
|
// Platform info
|
||||||
@@ -288,6 +288,7 @@ namespace cv
|
|||||||
ocltmpinfo.impl->devices.push_back(devices[j]);
|
ocltmpinfo.impl->devices.push_back(devices[j]);
|
||||||
openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 256, deviceName, NULL));
|
openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 256, deviceName, NULL));
|
||||||
ocltmpinfo.impl->devName.push_back(std::string(deviceName));
|
ocltmpinfo.impl->devName.push_back(std::string(deviceName));
|
||||||
|
ocltmpinfo.DeviceName.push_back(std::string(deviceName));
|
||||||
}
|
}
|
||||||
delete[] devices;
|
delete[] devices;
|
||||||
oclinfo.push_back(ocltmpinfo);
|
oclinfo.push_back(ocltmpinfo);
|
||||||
@@ -314,19 +315,19 @@ namespace cv
|
|||||||
openCLVerifyCall(status);
|
openCLVerifyCall(status);
|
||||||
//create the command queue using the first device of the list
|
//create the command queue using the first device of the list
|
||||||
oclinfo.impl->clCmdQueue = clCreateCommandQueue(oclinfo.impl->oclcontext, oclinfo.impl->devices[devnum],
|
oclinfo.impl->clCmdQueue = clCreateCommandQueue(oclinfo.impl->oclcontext, oclinfo.impl->devices[devnum],
|
||||||
CL_QUEUE_PROFILING_ENABLE, &status);
|
CL_QUEUE_PROFILING_ENABLE, &status);
|
||||||
openCLVerifyCall(status);
|
openCLVerifyCall(status);
|
||||||
|
|
||||||
//get device information
|
//get device information
|
||||||
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE,
|
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE,
|
||||||
sizeof(size_t), (void *)&oclinfo.impl->maxWorkGroupSize, NULL));
|
sizeof(size_t), (void *)&oclinfo.impl->maxWorkGroupSize, NULL));
|
||||||
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
|
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
|
||||||
sizeof(cl_uint), (void *)&oclinfo.impl->maxDimensions, NULL));
|
sizeof(cl_uint), (void *)&oclinfo.impl->maxDimensions, NULL));
|
||||||
oclinfo.impl->maxWorkItemSizes = new size_t[oclinfo.impl->maxDimensions];
|
oclinfo.impl->maxWorkItemSizes = new size_t[oclinfo.impl->maxDimensions];
|
||||||
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
||||||
sizeof(size_t)*oclinfo.impl->maxDimensions, (void *)oclinfo.impl->maxWorkItemSizes, NULL));
|
sizeof(size_t)*oclinfo.impl->maxDimensions, (void *)oclinfo.impl->maxWorkItemSizes, NULL));
|
||||||
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS,
|
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS,
|
||||||
sizeof(cl_uint), (void *)&oclinfo.impl->maxComputeUnits, NULL));
|
sizeof(cl_uint), (void *)&oclinfo.impl->maxComputeUnits, NULL));
|
||||||
//initialize extra options for compilation. Currently only fp64 is included.
|
//initialize extra options for compilation. Currently only fp64 is included.
|
||||||
//Assume 4KB is enough to store all possible extensions.
|
//Assume 4KB is enough to store all possible extensions.
|
||||||
|
|
||||||
@@ -334,9 +335,9 @@ namespace cv
|
|||||||
char extends_set[EXT_LEN];
|
char extends_set[EXT_LEN];
|
||||||
size_t extends_size;
|
size_t extends_size;
|
||||||
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_EXTENSIONS,
|
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_EXTENSIONS,
|
||||||
EXT_LEN, (void *)extends_set, &extends_size));
|
EXT_LEN, (void *)extends_set, &extends_size));
|
||||||
CV_Assert(extends_size < EXT_LEN);
|
CV_Assert(extends_size < EXT_LEN);
|
||||||
extends_set[EXT_LEN-1] = 0;
|
extends_set[EXT_LEN - 1] = 0;
|
||||||
//oclinfo.extra_options = NULL;
|
//oclinfo.extra_options = NULL;
|
||||||
int fp64_khr = string(extends_set).find("cl_khr_fp64");
|
int fp64_khr = string(extends_set).find("cl_khr_fp64");
|
||||||
|
|
||||||
@@ -347,86 +348,90 @@ namespace cv
|
|||||||
}
|
}
|
||||||
Context::setContext(oclinfo);
|
Context::setContext(oclinfo);
|
||||||
}
|
}
|
||||||
void* getoclContext()
|
void *getoclContext()
|
||||||
{
|
|
||||||
return &(Context::getContext()->impl->clContext);
|
{
|
||||||
}
|
|
||||||
void* getoclCommandQueue()
|
return &(Context::getContext()->impl->clContext);
|
||||||
{
|
|
||||||
return &(Context::getContext()->impl->clCmdQueue);
|
}
|
||||||
}
|
|
||||||
|
void *getoclCommandQueue()
|
||||||
|
{
|
||||||
|
return &(Context::getContext()->impl->clCmdQueue);
|
||||||
|
}
|
||||||
void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size)
|
void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size)
|
||||||
{
|
{
|
||||||
cl_int status;
|
cl_int status;
|
||||||
status = clEnqueueReadBuffer(clCxt->impl->clCmdQueue, dst_buffer, CL_TRUE, 0,
|
status = clEnqueueReadBuffer(clCxt->impl->clCmdQueue, dst_buffer, CL_TRUE, 0,
|
||||||
size, host_buffer, 0, NULL, NULL);
|
size, host_buffer, 0, NULL, NULL);
|
||||||
openCLVerifyCall(status);
|
openCLVerifyCall(status);
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_mem openCLCreateBuffer(Context *clCxt, size_t flag , size_t size)
|
cl_mem openCLCreateBuffer(Context *clCxt, size_t flag , size_t size)
|
||||||
{
|
{
|
||||||
cl_int status;
|
cl_int status;
|
||||||
cl_mem buffer = clCreateBuffer(clCxt->impl->clContext,(cl_mem_flags)flag, size, NULL, &status);
|
cl_mem buffer = clCreateBuffer(clCxt->impl->clContext, (cl_mem_flags)flag, size, NULL, &status);
|
||||||
openCLVerifyCall(status);
|
openCLVerifyCall(status);
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
|
void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
|
||||||
size_t widthInBytes, size_t height)
|
size_t widthInBytes, size_t height)
|
||||||
{
|
{
|
||||||
cl_int status;
|
cl_int status;
|
||||||
|
|
||||||
*dev_ptr = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE,
|
*dev_ptr = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE,
|
||||||
widthInBytes * height, 0, &status);
|
widthInBytes * height, 0, &status);
|
||||||
openCLVerifyCall(status);
|
openCLVerifyCall(status);
|
||||||
*pitch = widthInBytes;
|
*pitch = widthInBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
|
void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
|
||||||
const void *src, size_t spitch,
|
const void *src, size_t spitch,
|
||||||
size_t width, size_t height, enum openCLMemcpyKind kind, int channels)
|
size_t width, size_t height, enum openCLMemcpyKind kind, int channels)
|
||||||
{
|
{
|
||||||
size_t buffer_origin[3] = {0, 0, 0};
|
size_t buffer_origin[3] = {0, 0, 0};
|
||||||
size_t host_origin[3] = {0, 0, 0};
|
size_t host_origin[3] = {0, 0, 0};
|
||||||
size_t region[3] = {width, height, 1};
|
size_t region[3] = {width, height, 1};
|
||||||
if(kind == clMemcpyHostToDevice)
|
if(kind == clMemcpyHostToDevice)
|
||||||
{
|
{
|
||||||
if(dpitch == width || channels==3 || height == 1)
|
if(dpitch == width || channels == 3 || height == 1)
|
||||||
{
|
{
|
||||||
openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE,
|
openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE,
|
||||||
0, width*height, src, 0, NULL, NULL));
|
0, width * height, src, 0, NULL, NULL));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
openCLSafeCall(clEnqueueWriteBufferRect(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE,
|
openCLSafeCall(clEnqueueWriteBufferRect(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE,
|
||||||
buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0));
|
buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(kind == clMemcpyDeviceToHost)
|
else if(kind == clMemcpyDeviceToHost)
|
||||||
{
|
{
|
||||||
if(spitch == width || channels==3 || height == 1)
|
if(spitch == width || channels == 3 || height == 1)
|
||||||
{
|
{
|
||||||
openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE,
|
openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE,
|
||||||
0, width*height, dst, 0, NULL, NULL));
|
0, width * height, dst, 0, NULL, NULL));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
openCLSafeCall(clEnqueueReadBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE,
|
openCLSafeCall(clEnqueueReadBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE,
|
||||||
buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0));
|
buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
||||||
const void *src, size_t spitch,
|
const void *src, size_t spitch,
|
||||||
size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind)
|
size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind)
|
||||||
{
|
{
|
||||||
size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0};
|
size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0};
|
||||||
size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0};
|
size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0};
|
||||||
size_t region[3] = {width, height, 1};
|
size_t region[3] = {width, height, 1};
|
||||||
|
|
||||||
openCLSafeCall(clEnqueueCopyBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, (cl_mem)dst, src_origin, dst_origin,
|
openCLSafeCall(clEnqueueCopyBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, (cl_mem)dst, src_origin, dst_origin,
|
||||||
region, spitch, 0, dpitch, 0, 0, 0, 0));
|
region, spitch, 0, dpitch, 0, 0, 0, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void openCLFree(void *devPtr)
|
void openCLFree(void *devPtr)
|
||||||
@@ -438,11 +443,11 @@ namespace cv
|
|||||||
return openCLGetKernelFromSource(clCxt, source, kernelName, NULL);
|
return openCLGetKernelFromSource(clCxt, source, kernelName, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void setBinpath(const char *path)
|
void setBinpath(const char *path)
|
||||||
{
|
{
|
||||||
Context *clcxt = Context::getContext();
|
Context *clcxt = Context::getContext();
|
||||||
clcxt->impl->Binpath = path;
|
clcxt->impl->Binpath = path;
|
||||||
}
|
}
|
||||||
int savetofile(const Context *clcxt, cl_program &program, const char *fileName)
|
int savetofile(const Context *clcxt, cl_program &program, const char *fileName)
|
||||||
{
|
{
|
||||||
@@ -453,16 +458,16 @@ namespace cv
|
|||||||
size_t *binarySizes = (size_t *)malloc( sizeof(size_t) * numDevices );
|
size_t *binarySizes = (size_t *)malloc( sizeof(size_t) * numDevices );
|
||||||
|
|
||||||
openCLSafeCall(clGetProgramInfo(program,
|
openCLSafeCall(clGetProgramInfo(program,
|
||||||
CL_PROGRAM_BINARY_SIZES,
|
CL_PROGRAM_BINARY_SIZES,
|
||||||
sizeof(size_t) * numDevices,
|
sizeof(size_t) * numDevices,
|
||||||
binarySizes, NULL));
|
binarySizes, NULL));
|
||||||
|
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
//copy over all of the generated binaries.
|
//copy over all of the generated binaries.
|
||||||
char **binaries = (char **)malloc( sizeof(char *) * numDevices );
|
char **binaries = (char **)malloc( sizeof(char *) * numDevices );
|
||||||
if(binaries == NULL)
|
if(binaries == NULL)
|
||||||
{
|
{
|
||||||
CV_Error(CV_StsNoMem,"Failed to allocate host memory.(binaries)\r\n");
|
CV_Error(CV_StsNoMem, "Failed to allocate host memory.(binaries)\r\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
for(i = 0; i < numDevices; i++)
|
for(i = 0; i < numDevices; i++)
|
||||||
@@ -472,7 +477,7 @@ namespace cv
|
|||||||
binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]);
|
binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]);
|
||||||
if(binaries[i] == NULL)
|
if(binaries[i] == NULL)
|
||||||
{
|
{
|
||||||
CV_Error(CV_StsNoMem,"Failed to allocate host memory.(binaries[i])\r\n");
|
CV_Error(CV_StsNoMem, "Failed to allocate host memory.(binaries[i])\r\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -481,10 +486,10 @@ namespace cv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
openCLSafeCall(clGetProgramInfo(program,
|
openCLSafeCall(clGetProgramInfo(program,
|
||||||
CL_PROGRAM_BINARIES,
|
CL_PROGRAM_BINARIES,
|
||||||
sizeof(char *) * numDevices,
|
sizeof(char *) * numDevices,
|
||||||
binaries,
|
binaries,
|
||||||
NULL));
|
NULL));
|
||||||
|
|
||||||
//dump out each binary into its own separate file.
|
//dump out each binary into its own separate file.
|
||||||
for(i = 0; i < numDevices; i++)
|
for(i = 0; i < numDevices; i++)
|
||||||
@@ -493,10 +498,10 @@ namespace cv
|
|||||||
{
|
{
|
||||||
char deviceName[1024];
|
char deviceName[1024];
|
||||||
openCLSafeCall(clGetDeviceInfo(devices[i],
|
openCLSafeCall(clGetDeviceInfo(devices[i],
|
||||||
CL_DEVICE_NAME,
|
CL_DEVICE_NAME,
|
||||||
sizeof(deviceName),
|
sizeof(deviceName),
|
||||||
deviceName,
|
deviceName,
|
||||||
NULL));
|
NULL));
|
||||||
|
|
||||||
printf( "%s binary kernel: %s\n", deviceName, fileName);
|
printf( "%s binary kernel: %s\n", deviceName, fileName);
|
||||||
FILE *fp = fopen(fileName, "wb+");
|
FILE *fp = fopen(fileName, "wb+");
|
||||||
@@ -516,7 +521,7 @@ namespace cv
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
printf("Skipping %s since there is no binary data to write!\n",
|
printf("Skipping %s since there is no binary data to write!\n",
|
||||||
fileName);
|
fileName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(binarySizes);
|
free(binarySizes);
|
||||||
@@ -526,24 +531,24 @@ namespace cv
|
|||||||
|
|
||||||
|
|
||||||
cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName,
|
cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName,
|
||||||
const char *build_options)
|
const char *build_options)
|
||||||
{
|
{
|
||||||
cl_kernel kernel;
|
cl_kernel kernel;
|
||||||
cl_program program ;
|
cl_program program ;
|
||||||
cl_int status = 0;
|
cl_int status = 0;
|
||||||
stringstream src_sign;
|
stringstream src_sign;
|
||||||
string srcsign;
|
string srcsign;
|
||||||
string filename;
|
string filename;
|
||||||
CV_Assert(programCache != NULL);
|
CV_Assert(programCache != NULL);
|
||||||
|
|
||||||
if(NULL != build_options)
|
if(NULL != build_options)
|
||||||
{
|
{
|
||||||
src_sign << (int64)(*source) << clCxt->impl->clContext << "_" << build_options;
|
src_sign << (int64)(*source) << clCxt->impl->clContext << "_" << build_options;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
src_sign << (int64)(*source) << clCxt->impl->clContext;
|
src_sign << (int64)(*source) << clCxt->impl->clContext;
|
||||||
}
|
}
|
||||||
srcsign = src_sign.str();
|
srcsign = src_sign.str();
|
||||||
|
|
||||||
program = NULL;
|
program = NULL;
|
||||||
@@ -554,31 +559,31 @@ namespace cv
|
|||||||
//config build programs
|
//config build programs
|
||||||
char all_build_options[1024];
|
char all_build_options[1024];
|
||||||
memset(all_build_options, 0, 1024);
|
memset(all_build_options, 0, 1024);
|
||||||
char zeromem[512]={0};
|
char zeromem[512] = {0};
|
||||||
if(0!=memcmp(clCxt -> impl->extra_options, zeromem,512))
|
if(0 != memcmp(clCxt -> impl->extra_options, zeromem, 512))
|
||||||
strcat(all_build_options, clCxt -> impl->extra_options);
|
strcat(all_build_options, clCxt -> impl->extra_options);
|
||||||
strcat(all_build_options, " ");
|
strcat(all_build_options, " ");
|
||||||
if(build_options != NULL)
|
if(build_options != NULL)
|
||||||
strcat(all_build_options, build_options);
|
strcat(all_build_options, build_options);
|
||||||
if(all_build_options != NULL)
|
if(all_build_options != NULL)
|
||||||
{
|
{
|
||||||
filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + all_build_options + ".clb";
|
filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + all_build_options + ".clb";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + ".clb";
|
filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + ".clb";
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
fp = fopen(filename.c_str(), "rb");
|
fp = fopen(filename.c_str(), "rb");
|
||||||
if(fp == NULL || clCxt->impl->Binpath.size() == 0) //we should genetate a binary file for the first time.
|
if(fp == NULL || clCxt->impl->Binpath.size() == 0) //we should genetate a binary file for the first time.
|
||||||
{
|
{
|
||||||
program = clCreateProgramWithSource(
|
program = clCreateProgramWithSource(
|
||||||
clCxt->impl->clContext, 1, source, NULL, &status);
|
clCxt->impl->clContext, 1, source, NULL, &status);
|
||||||
openCLVerifyCall(status);
|
openCLVerifyCall(status);
|
||||||
status = clBuildProgram(program, 1, &(clCxt->impl->devices[0]), all_build_options, NULL, NULL);
|
status = clBuildProgram(program, 1, &(clCxt->impl->devices[0]), all_build_options, NULL, NULL);
|
||||||
if(status == CL_SUCCESS && clCxt->impl->Binpath.size())
|
if(status == CL_SUCCESS && clCxt->impl->Binpath.size())
|
||||||
savetofile(clCxt, program, filename.c_str());
|
savetofile(clCxt, program, filename.c_str());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -590,12 +595,12 @@ namespace cv
|
|||||||
fclose(fp);
|
fclose(fp);
|
||||||
cl_int status = 0;
|
cl_int status = 0;
|
||||||
program = clCreateProgramWithBinary(clCxt->impl->clContext,
|
program = clCreateProgramWithBinary(clCxt->impl->clContext,
|
||||||
1,
|
1,
|
||||||
&(clCxt->impl->devices[0]),
|
&(clCxt->impl->devices[0]),
|
||||||
(const size_t *)&binarySize,
|
(const size_t *)&binarySize,
|
||||||
(const unsigned char **)&binary,
|
(const unsigned char **)&binary,
|
||||||
NULL,
|
NULL,
|
||||||
&status);
|
&status);
|
||||||
openCLVerifyCall(status);
|
openCLVerifyCall(status);
|
||||||
status = clBuildProgram(program, 1, &(clCxt->impl->devices[0]), all_build_options, NULL, NULL);
|
status = clBuildProgram(program, 1, &(clCxt->impl->devices[0]), all_build_options, NULL, NULL);
|
||||||
}
|
}
|
||||||
@@ -608,15 +613,15 @@ namespace cv
|
|||||||
char *buildLog = NULL;
|
char *buildLog = NULL;
|
||||||
size_t buildLogSize = 0;
|
size_t buildLogSize = 0;
|
||||||
logStatus = clGetProgramBuildInfo(program,
|
logStatus = clGetProgramBuildInfo(program,
|
||||||
clCxt->impl->devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize,
|
clCxt->impl->devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize,
|
||||||
buildLog, &buildLogSize);
|
buildLog, &buildLogSize);
|
||||||
if(logStatus != CL_SUCCESS)
|
if(logStatus != CL_SUCCESS)
|
||||||
cout << "Failed to build the program and get the build info." << endl;
|
cout << "Failed to build the program and get the build info." << endl;
|
||||||
buildLog = new char[buildLogSize];
|
buildLog = new char[buildLogSize];
|
||||||
CV_DbgAssert(!!buildLog);
|
CV_DbgAssert(!!buildLog);
|
||||||
memset(buildLog, 0, buildLogSize);
|
memset(buildLog, 0, buildLogSize);
|
||||||
openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices[0],
|
openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices[0],
|
||||||
CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL));
|
CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL));
|
||||||
cout << "\n\t\t\tBUILD LOG\n";
|
cout << "\n\t\t\tBUILD LOG\n";
|
||||||
cout << buildLog << endl;
|
cout << buildLog << endl;
|
||||||
delete buildLog;
|
delete buildLog;
|
||||||
@@ -626,8 +631,8 @@ namespace cv
|
|||||||
//Cache the binary for future use if build_options is null
|
//Cache the binary for future use if build_options is null
|
||||||
if( (programCache->cacheSize += 1) < programCache->MAX_PROG_CACHE_SIZE)
|
if( (programCache->cacheSize += 1) < programCache->MAX_PROG_CACHE_SIZE)
|
||||||
programCache->addProgram(srcsign, program);
|
programCache->addProgram(srcsign, program);
|
||||||
else
|
else
|
||||||
cout << "Warning: code cache has been full.\n";
|
cout << "Warning: code cache has been full.\n";
|
||||||
}
|
}
|
||||||
kernel = clCreateKernel(program, kernelName.c_str(), &status);
|
kernel = clCreateKernel(program, kernelName.c_str(), &status);
|
||||||
openCLVerifyCall(status);
|
openCLVerifyCall(status);
|
||||||
@@ -635,16 +640,16 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *blockSize,
|
void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *blockSize,
|
||||||
size_t *globalThreads, size_t *localThreads)
|
size_t *globalThreads, size_t *localThreads)
|
||||||
{
|
{
|
||||||
size_t kernelWorkGroupSize;
|
size_t kernelWorkGroupSize;
|
||||||
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[0],
|
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[0],
|
||||||
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
|
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
|
||||||
CV_DbgAssert( (localThreads[0] <= clCxt->impl->maxWorkItemSizes[0]) &&
|
CV_DbgAssert( (localThreads[0] <= clCxt->impl->maxWorkItemSizes[0]) &&
|
||||||
(localThreads[1] <= clCxt->impl->maxWorkItemSizes[1]) &&
|
(localThreads[1] <= clCxt->impl->maxWorkItemSizes[1]) &&
|
||||||
(localThreads[2] <= clCxt->impl->maxWorkItemSizes[2]) &&
|
(localThreads[2] <= clCxt->impl->maxWorkItemSizes[2]) &&
|
||||||
((localThreads[0] * localThreads[1] * localThreads[2]) <= kernelWorkGroupSize) &&
|
((localThreads[0] * localThreads[1] * localThreads[2]) <= kernelWorkGroupSize) &&
|
||||||
(localThreads[0] * localThreads[1] * localThreads[2]) <= clCxt->impl->maxWorkGroupSize);
|
(localThreads[0] * localThreads[1] * localThreads[2]) <= clCxt->impl->maxWorkGroupSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef PRINT_KERNEL_RUN_TIME
|
#ifdef PRINT_KERNEL_RUN_TIME
|
||||||
@@ -652,8 +657,8 @@ namespace cv
|
|||||||
static double total_kernel_time = 0;
|
static double total_kernel_time = 0;
|
||||||
#endif
|
#endif
|
||||||
void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
||||||
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
|
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
|
||||||
int depth, const char *build_options)
|
int depth, const char *build_options)
|
||||||
{
|
{
|
||||||
//construct kernel name
|
//construct kernel name
|
||||||
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
||||||
@@ -667,13 +672,13 @@ namespace cv
|
|||||||
|
|
||||||
cl_kernel kernel;
|
cl_kernel kernel;
|
||||||
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
|
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
|
||||||
|
|
||||||
if ( localThreads != NULL)
|
if ( localThreads != NULL)
|
||||||
{
|
{
|
||||||
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
|
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
|
||||||
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
|
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
|
||||||
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
|
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
|
||||||
|
|
||||||
size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
|
size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
|
||||||
cv::ocl::openCLVerifyKernel(clCxt, kernel, &blockSize, globalThreads, localThreads);
|
cv::ocl::openCLVerifyKernel(clCxt, kernel, &blockSize, globalThreads, localThreads);
|
||||||
}
|
}
|
||||||
@@ -682,11 +687,11 @@ namespace cv
|
|||||||
|
|
||||||
#ifndef PRINT_KERNEL_RUN_TIME
|
#ifndef PRINT_KERNEL_RUN_TIME
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
|
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
|
||||||
localThreads, 0, NULL, NULL));
|
localThreads, 0, NULL, NULL));
|
||||||
#else
|
#else
|
||||||
cl_event event = NULL;
|
cl_event event = NULL;
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
|
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
|
||||||
localThreads, 0, NULL, &event));
|
localThreads, 0, NULL, &event));
|
||||||
|
|
||||||
cl_ulong start_time, end_time, queue_time;
|
cl_ulong start_time, end_time, queue_time;
|
||||||
double execute_time = 0;
|
double execute_time = 0;
|
||||||
@@ -694,13 +699,13 @@ namespace cv
|
|||||||
|
|
||||||
openCLSafeCall(clWaitForEvents(1, &event));
|
openCLSafeCall(clWaitForEvents(1, &event));
|
||||||
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
|
||||||
sizeof(cl_ulong), &start_time, 0));
|
sizeof(cl_ulong), &start_time, 0));
|
||||||
|
|
||||||
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
|
||||||
sizeof(cl_ulong), &end_time, 0));
|
sizeof(cl_ulong), &end_time, 0));
|
||||||
|
|
||||||
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
|
||||||
sizeof(cl_ulong), &queue_time, 0));
|
sizeof(cl_ulong), &queue_time, 0));
|
||||||
|
|
||||||
execute_time = (double)(end_time - start_time) / (1000 * 1000);
|
execute_time = (double)(end_time - start_time) / (1000 * 1000);
|
||||||
total_time = (double)(end_time - queue_time) / (1000 * 1000);
|
total_time = (double)(end_time - queue_time) / (1000 * 1000);
|
||||||
@@ -719,20 +724,20 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName,
|
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName,
|
||||||
size_t globalThreads[3], size_t localThreads[3],
|
size_t globalThreads[3], size_t localThreads[3],
|
||||||
vector< pair<size_t, const void *> > &args, int channels, int depth)
|
vector< pair<size_t, const void *> > &args, int channels, int depth)
|
||||||
{
|
{
|
||||||
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args,
|
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args,
|
||||||
channels, depth, NULL);
|
channels, depth, NULL);
|
||||||
}
|
}
|
||||||
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName,
|
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName,
|
||||||
size_t globalThreads[3], size_t localThreads[3],
|
size_t globalThreads[3], size_t localThreads[3],
|
||||||
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
|
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
|
||||||
|
|
||||||
{
|
{
|
||||||
#ifndef PRINT_KERNEL_RUN_TIME
|
#ifndef PRINT_KERNEL_RUN_TIME
|
||||||
openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
|
openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
|
||||||
build_options);
|
build_options);
|
||||||
#else
|
#else
|
||||||
string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"};
|
string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"};
|
||||||
cout << endl;
|
cout << endl;
|
||||||
@@ -752,7 +757,7 @@ namespace cv
|
|||||||
int i = 0;
|
int i = 0;
|
||||||
for(i = 0; i < RUN_TIMES; i++)
|
for(i = 0; i < RUN_TIMES; i++)
|
||||||
openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
|
openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
|
||||||
build_options);
|
build_options);
|
||||||
|
|
||||||
cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
|
cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
|
||||||
cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
|
cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
|
||||||
@@ -760,7 +765,7 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
|
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
|
||||||
const size_t size)
|
const size_t size)
|
||||||
{
|
{
|
||||||
int status;
|
int status;
|
||||||
cl_mem con_struct;
|
cl_mem con_struct;
|
||||||
@@ -769,7 +774,7 @@ namespace cv
|
|||||||
openCLSafeCall(status);
|
openCLSafeCall(status);
|
||||||
|
|
||||||
openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size,
|
openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size,
|
||||||
value, 0, 0, 0));
|
value, 0, 0, 0));
|
||||||
|
|
||||||
return con_struct;
|
return con_struct;
|
||||||
|
|
||||||
@@ -801,7 +806,7 @@ namespace cv
|
|||||||
clcxt->impl->clContext = oclinfo.impl->oclcontext;
|
clcxt->impl->clContext = oclinfo.impl->oclcontext;
|
||||||
clcxt->impl->clCmdQueue = oclinfo.impl->clCmdQueue;
|
clcxt->impl->clCmdQueue = oclinfo.impl->clCmdQueue;
|
||||||
clcxt->impl->devices = &oclinfo.impl->devices[oclinfo.impl->devnum];
|
clcxt->impl->devices = &oclinfo.impl->devices[oclinfo.impl->devnum];
|
||||||
clcxt->impl->devName = oclinfo.impl->devName[oclinfo.impl->devnum];
|
clcxt->impl->devName = oclinfo.impl->devName[oclinfo.impl->devnum];
|
||||||
clcxt->impl->maxDimensions = oclinfo.impl->maxDimensions;
|
clcxt->impl->maxDimensions = oclinfo.impl->maxDimensions;
|
||||||
clcxt->impl->maxWorkGroupSize = oclinfo.impl->maxWorkGroupSize;
|
clcxt->impl->maxWorkGroupSize = oclinfo.impl->maxWorkGroupSize;
|
||||||
clcxt->impl->maxWorkItemSizes = oclinfo.impl->maxWorkItemSizes;
|
clcxt->impl->maxWorkItemSizes = oclinfo.impl->maxWorkItemSizes;
|
||||||
@@ -873,6 +878,7 @@ namespace cv
|
|||||||
//}
|
//}
|
||||||
impl->devices.clear();
|
impl->devices.clear();
|
||||||
impl->devName.clear();
|
impl->devName.clear();
|
||||||
|
DeviceName.clear();
|
||||||
}
|
}
|
||||||
Info::~Info()
|
Info::~Info()
|
||||||
{
|
{
|
||||||
@@ -895,6 +901,7 @@ namespace cv
|
|||||||
{
|
{
|
||||||
impl->devices.push_back(m.impl->devices[i]);
|
impl->devices.push_back(m.impl->devices[i]);
|
||||||
impl->devName.push_back(m.impl->devName[i]);
|
impl->devName.push_back(m.impl->devName[i]);
|
||||||
|
DeviceName.push_back(m.DeviceName[i]);
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|||||||
315
modules/ocl/src/interpolate_frames.cpp
Normal file
315
modules/ocl/src/interpolate_frames.cpp
Normal file
@@ -0,0 +1,315 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Comuter Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular urpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include <iomanip>
|
||||||
|
#include "precomp.hpp"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::ocl;
|
||||||
|
|
||||||
|
|
||||||
|
#if !defined (HAVE_OPENCL)
|
||||||
|
void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1,
|
||||||
|
const oclMat &fu, const oclMat &fv,
|
||||||
|
const oclMat &bu, const oclMat &bv,
|
||||||
|
float pos, oclMat &newFrame, oclMat &buf)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
namespace ocl
|
||||||
|
{
|
||||||
|
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||||
|
extern const char *interpolate_frames;
|
||||||
|
|
||||||
|
namespace interpolate
|
||||||
|
{
|
||||||
|
//The following are ported from NPP_staging.cu
|
||||||
|
// As it is not valid to do pointer offset operations on host for default oclMat's native cl_mem pointer,
|
||||||
|
// we may have to do this on kernel
|
||||||
|
void memsetKernel(float val, oclMat &img, int height, int offset);
|
||||||
|
void normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset);
|
||||||
|
void forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale,
|
||||||
|
int b_offset, int d_offset); // buffer, dst offset
|
||||||
|
|
||||||
|
//OpenCL conversion of nppiStVectorWarp_PSF2x2_32f_C1
|
||||||
|
void vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v,
|
||||||
|
oclMat &buffer, int buf_offset, float timeScale, int dst_offset);
|
||||||
|
//OpenCL conversion of BlendFrames
|
||||||
|
void blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer,
|
||||||
|
float pos, oclMat &newFrame, cl_mem &, cl_mem &);
|
||||||
|
|
||||||
|
// bind a buffer to an image
|
||||||
|
void bindImgTex(const oclMat &img, cl_mem &tex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1,
|
||||||
|
const oclMat &fu, const oclMat &fv,
|
||||||
|
const oclMat &bu, const oclMat &bv,
|
||||||
|
float pos, oclMat &newFrame, oclMat &buf)
|
||||||
|
{
|
||||||
|
CV_Assert(frame0.type() == CV_32FC1);
|
||||||
|
CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
|
||||||
|
CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type());
|
||||||
|
CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type());
|
||||||
|
CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type());
|
||||||
|
CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type());
|
||||||
|
|
||||||
|
newFrame.create(frame0.size(), frame0.type());
|
||||||
|
|
||||||
|
buf.create(6 * frame0.rows, frame0.cols, CV_32FC1);
|
||||||
|
buf.setTo(Scalar::all(0));
|
||||||
|
|
||||||
|
size_t step = frame0.step;
|
||||||
|
|
||||||
|
CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step);
|
||||||
|
cl_mem tex_src0 = 0, tex_src1 = 0;
|
||||||
|
|
||||||
|
// warp flow
|
||||||
|
using namespace interpolate;
|
||||||
|
|
||||||
|
bindImgTex(frame0, tex_src0);
|
||||||
|
bindImgTex(frame1, tex_src1);
|
||||||
|
|
||||||
|
// CUDA Offsets
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
cov0 = 0,
|
||||||
|
cov1,
|
||||||
|
fwdU,
|
||||||
|
fwdV,
|
||||||
|
bwdU,
|
||||||
|
bwdV
|
||||||
|
};
|
||||||
|
|
||||||
|
vectorWarp(fu, fu, fv, buf, cov0, pos, fwdU);
|
||||||
|
vectorWarp(fv, fu, fv, buf, cov0, pos, fwdV);
|
||||||
|
vectorWarp(bu, bu, bv, buf, cov1, 1.0f - pos, bwdU);
|
||||||
|
vectorWarp(bv, bu, bv, buf, cov1, 1.0f - pos, bwdU);
|
||||||
|
|
||||||
|
blendFrames(frame0, frame1, buf, pos, newFrame, tex_src0, tex_src1);
|
||||||
|
|
||||||
|
openCLFree(tex_src0);
|
||||||
|
openCLFree(tex_src1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void interpolate::memsetKernel(float val, oclMat &img, int height, int offset)
|
||||||
|
{
|
||||||
|
Context *clCxt = Context::getContext();
|
||||||
|
string kernelName = "memsetKernel";
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
int step = img.step / sizeof(float);
|
||||||
|
offset = step * height * offset;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&val));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&img.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&height));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&offset));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {img.cols, height, 1};
|
||||||
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
void interpolate::normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset)
|
||||||
|
{
|
||||||
|
Context *clCxt = Context::getContext();
|
||||||
|
string kernelName = "normalizeKernel";
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
int step = buffer.step / sizeof(float);
|
||||||
|
factor_offset = step * height * factor_offset;
|
||||||
|
dst_offset = step * height * dst_offset;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buffer.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&height));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&factor_offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {buffer.cols, height, 1};
|
||||||
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void interpolate::forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale,
|
||||||
|
int b_offset, int d_offset)
|
||||||
|
{
|
||||||
|
Context *clCxt = Context::getContext();
|
||||||
|
string kernelName = "forwardWarpKernel";
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
int f_step = u.step / sizeof(float); // flow step
|
||||||
|
int b_step = buffer.step / sizeof(float);
|
||||||
|
|
||||||
|
b_offset = b_step * src.rows * b_offset;
|
||||||
|
d_offset = b_step * src.rows * d_offset;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&u.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&v.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&f_step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&b_step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&b_offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&d_offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&time_scale));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {src.cols, src.rows, 1};
|
||||||
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void interpolate::vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v,
|
||||||
|
oclMat &buffer, int b_offset, float timeScale, int d_offset)
|
||||||
|
{
|
||||||
|
memsetKernel(0, buffer, src.rows, b_offset);
|
||||||
|
forwardWarpKernel(src, buffer, u, v, timeScale, b_offset, d_offset);
|
||||||
|
normalizeKernel(buffer, src.rows, b_offset, d_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void interpolate::blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer, float pos, oclMat &newFrame, cl_mem &tex_src0, cl_mem &tex_src1)
|
||||||
|
{
|
||||||
|
int step = buffer.step / sizeof(float);
|
||||||
|
|
||||||
|
Context *clCxt = Context::getContext();
|
||||||
|
string kernelName = "blendFramesKernel";
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&tex_src0));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&tex_src1));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&newFrame.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&frame0.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&frame0.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&pos));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {frame0.cols, frame0.rows, 1};
|
||||||
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)
|
||||||
|
{
|
||||||
|
cl_image_format format;
|
||||||
|
int err;
|
||||||
|
int depth = img.depth();
|
||||||
|
int channels = img.channels();
|
||||||
|
|
||||||
|
switch(depth)
|
||||||
|
{
|
||||||
|
case CV_8U:
|
||||||
|
format.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||||
|
break;
|
||||||
|
case CV_32S:
|
||||||
|
format.image_channel_data_type = CL_UNSIGNED_INT32;
|
||||||
|
break;
|
||||||
|
case CV_32F:
|
||||||
|
format.image_channel_data_type = CL_FLOAT;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw std::exception();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
switch(channels)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
format.image_channel_order = CL_R;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
format.image_channel_order = CL_RGB;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
format.image_channel_order = CL_RGBA;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw std::exception();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(texture)
|
||||||
|
{
|
||||||
|
openCLFree(texture);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if CL_VERSION_1_2
|
||||||
|
cl_image_desc desc;
|
||||||
|
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||||
|
desc.image_width = img.step / img.elemSize();
|
||||||
|
desc.image_height = img.rows;
|
||||||
|
desc.image_depth = 0;
|
||||||
|
desc.image_array_size = 1;
|
||||||
|
desc.image_row_pitch = 0;
|
||||||
|
desc.image_slice_pitch = 0;
|
||||||
|
desc.buffer = NULL;
|
||||||
|
desc.num_mip_levels = 0;
|
||||||
|
desc.num_samples = 0;
|
||||||
|
texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
|
||||||
|
#else
|
||||||
|
texture = clCreateImage2D(
|
||||||
|
Context::getContext()->impl->clContext,
|
||||||
|
CL_MEM_READ_WRITE,
|
||||||
|
&format,
|
||||||
|
img.step / img.elemSize(),
|
||||||
|
img.rows,
|
||||||
|
0,
|
||||||
|
NULL,
|
||||||
|
&err);
|
||||||
|
#endif
|
||||||
|
size_t origin[] = { 0, 0, 0 };
|
||||||
|
size_t region[] = { img.step / img.elemSize(), img.rows, 1 };
|
||||||
|
clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);
|
||||||
|
openCLSafeCall(err);
|
||||||
|
}
|
||||||
|
#endif//(HAVE_OPENCL)
|
||||||
|
|
||||||
@@ -70,9 +70,22 @@ __kernel void arithm_absdiff_D0 (__global uchar *src1, int src1_step, int src1_o
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = abs_diff(src1_data, src2_data);
|
uchar4 tmp_data = abs_diff(src1_data, src2_data);
|
||||||
@@ -242,9 +255,15 @@ __kernel void arithm_s_absdiff_C1_D0 (__global uchar *src1, int src1_step, int
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
|
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4_sat(abs_diff(convert_int4_sat(src1_data), src2_data));
|
uchar4 tmp_data = convert_uchar4_sat(abs_diff(convert_int4_sat(src1_data), src2_data));
|
||||||
|
|||||||
@@ -71,10 +71,22 @@ __kernel void arithm_add_D0 (__global uchar *src1, int src1_step, int src1_offse
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
|
short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
|
||||||
uchar4 tmp_data = convert_uchar4_sat(tmp);
|
uchar4 tmp_data = convert_uchar4_sat(tmp);
|
||||||
@@ -248,11 +260,31 @@ __kernel void arithm_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, i
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
int mask_index_fix = mask_index < 0 ? 0 : mask_index;
|
||||||
uchar4 mask_data = vload4(0, mask + mask_index);
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
uchar4 mask_data = vload4(0, mask + mask_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(mask_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (mask_index == -2) ? mask_data.zwxy:mask_data.yzwx;
|
||||||
|
mask_data.xyzw = (mask_index == -1) ? mask_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
||||||
short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
|
short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
|
||||||
uchar4 tmp_data = convert_uchar4_sat(tmp);
|
uchar4 tmp_data = convert_uchar4_sat(tmp);
|
||||||
|
|||||||
@@ -65,10 +65,16 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
|
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
||||||
int4 tmp = convert_int4_sat(src1_data) + src2_data;
|
int4 tmp = convert_int4_sat(src1_data) + src2_data;
|
||||||
uchar4 tmp_data = convert_uchar4_sat(tmp);
|
uchar4 tmp_data = convert_uchar4_sat(tmp);
|
||||||
|
|||||||
@@ -68,10 +68,23 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
int mask_index_fix = mask_index < 0 ? 0 : mask_index;
|
||||||
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
|
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
|
||||||
uchar4 mask_data = vload4(0, mask + mask_index);
|
uchar4 mask_data = vload4(0, mask + mask_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(mask_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (mask_index == -2) ? mask_data.zwxy:mask_data.yzwx;
|
||||||
|
mask_data.xyzw = (mask_index == -1) ? mask_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
||||||
int4 tmp = convert_int4_sat(src1_data) + src2_data;
|
int4 tmp = convert_int4_sat(src1_data) + src2_data;
|
||||||
|
|||||||
@@ -71,9 +71,22 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of
|
|||||||
int dst_end_1 = mad24(rows - y - 1, dst_step, dst_offset + dst_step1);
|
int dst_end_1 = mad24(rows - y - 1, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index_0 = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index_0 = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
int dst_index_1 = mad24(rows - y - 1, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index_1 = mad24(rows - y - 1, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src_index_0 < 0 ? 0 : src_index_0;
|
||||||
uchar4 src_data_0 = vload4(0, src + src_index_0);
|
int src2_index_fix = src_index_1 < 0 ? 0 : src_index_1;
|
||||||
uchar4 src_data_1 = vload4(0, src + src_index_1);
|
uchar4 src_data_0 = vload4(0, src + src1_index_fix);
|
||||||
|
uchar4 src_data_1 = vload4(0, src + src2_index_fix);
|
||||||
|
if(src_index_0 < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src_index_0 == -2) ? src_data_0.zwxy:src_data_0.yzwx;
|
||||||
|
src_data_0.xyzw = (src_index_0 == -1) ? src_data_0.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src_index_1 < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src_index_1 == -2) ? src_data_1.zwxy:src_data_1.yzwx;
|
||||||
|
src_data_1.xyzw = (src_index_1 == -1) ? src_data_1.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 dst_data_0 = *((__global uchar4 *)(dst + dst_index_0));
|
uchar4 dst_data_0 = *((__global uchar4 *)(dst + dst_index_0));
|
||||||
uchar4 dst_data_1 = *((__global uchar4 *)(dst + dst_index_1));
|
uchar4 dst_data_1 = *((__global uchar4 *)(dst + dst_index_1));
|
||||||
|
|||||||
237
modules/ocl/src/kernels/build_warps.cl
Normal file
237
modules/ocl/src/kernels/build_warps.cl
Normal file
@@ -0,0 +1,237 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
__kernel
|
||||||
|
void buildWarpPlaneMaps
|
||||||
|
(
|
||||||
|
__global float * map_x,
|
||||||
|
__global float * map_y,
|
||||||
|
__constant float * KRT,
|
||||||
|
int tl_u,
|
||||||
|
int tl_v,
|
||||||
|
int cols,
|
||||||
|
int rows,
|
||||||
|
int step_x,
|
||||||
|
int step_y,
|
||||||
|
float scale
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int du = get_global_id(0);
|
||||||
|
int dv = get_global_id(1);
|
||||||
|
step_x /= sizeof(float);
|
||||||
|
step_y /= sizeof(float);
|
||||||
|
|
||||||
|
__constant float * ck_rinv = KRT;
|
||||||
|
__constant float * ct = KRT + 9;
|
||||||
|
|
||||||
|
if (du < cols && dv < rows)
|
||||||
|
{
|
||||||
|
float u = tl_u + du;
|
||||||
|
float v = tl_v + dv;
|
||||||
|
float x, y;
|
||||||
|
|
||||||
|
float x_ = u / scale - ct[0];
|
||||||
|
float y_ = v / scale - ct[1];
|
||||||
|
|
||||||
|
float z;
|
||||||
|
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);
|
||||||
|
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);
|
||||||
|
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);
|
||||||
|
|
||||||
|
x /= z;
|
||||||
|
y /= z;
|
||||||
|
|
||||||
|
map_x[dv * step_x + du] = x;
|
||||||
|
map_y[dv * step_y + du] = y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel
|
||||||
|
void buildWarpCylindricalMaps
|
||||||
|
(
|
||||||
|
__global float * map_x,
|
||||||
|
__global float * map_y,
|
||||||
|
__constant float * ck_rinv,
|
||||||
|
int tl_u,
|
||||||
|
int tl_v,
|
||||||
|
int cols,
|
||||||
|
int rows,
|
||||||
|
int step_x,
|
||||||
|
int step_y,
|
||||||
|
float scale
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int du = get_global_id(0);
|
||||||
|
int dv = get_global_id(1);
|
||||||
|
step_x /= sizeof(float);
|
||||||
|
step_y /= sizeof(float);
|
||||||
|
|
||||||
|
if (du < cols && dv < rows)
|
||||||
|
{
|
||||||
|
float u = tl_u + du;
|
||||||
|
float v = tl_v + dv;
|
||||||
|
float x, y;
|
||||||
|
|
||||||
|
u /= scale;
|
||||||
|
float x_ = sin(u);
|
||||||
|
float y_ = v / scale;
|
||||||
|
float z_ = cos(u);
|
||||||
|
|
||||||
|
float z;
|
||||||
|
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||||
|
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
|
||||||
|
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
|
||||||
|
|
||||||
|
if (z > 0) { x /= z; y /= z; }
|
||||||
|
else x = y = -1;
|
||||||
|
|
||||||
|
map_x[dv * step_x + du] = x;
|
||||||
|
map_y[dv * step_y + du] = y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel
|
||||||
|
void buildWarpSphericalMaps
|
||||||
|
(
|
||||||
|
__global float * map_x,
|
||||||
|
__global float * map_y,
|
||||||
|
__constant float * ck_rinv,
|
||||||
|
int tl_u,
|
||||||
|
int tl_v,
|
||||||
|
int cols,
|
||||||
|
int rows,
|
||||||
|
int step_x,
|
||||||
|
int step_y,
|
||||||
|
float scale
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int du = get_global_id(0);
|
||||||
|
int dv = get_global_id(1);
|
||||||
|
step_x /= sizeof(float);
|
||||||
|
step_y /= sizeof(float);
|
||||||
|
|
||||||
|
if (du < cols && dv < rows)
|
||||||
|
{
|
||||||
|
float u = tl_u + du;
|
||||||
|
float v = tl_v + dv;
|
||||||
|
float x, y;
|
||||||
|
|
||||||
|
v /= scale;
|
||||||
|
u /= scale;
|
||||||
|
|
||||||
|
float sinv = sin(v);
|
||||||
|
float x_ = sinv * sin(u);
|
||||||
|
float y_ = - cos(v);
|
||||||
|
float z_ = sinv * cos(u);
|
||||||
|
|
||||||
|
float z;
|
||||||
|
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||||
|
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
|
||||||
|
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
|
||||||
|
|
||||||
|
if (z > 0) { x /= z; y /= z; }
|
||||||
|
else x = y = -1;
|
||||||
|
|
||||||
|
map_x[dv * step_x + du] = x;
|
||||||
|
map_y[dv * step_y + du] = y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel
|
||||||
|
void buildWarpAffineMaps
|
||||||
|
(
|
||||||
|
__global float * xmap,
|
||||||
|
__global float * ymap,
|
||||||
|
__constant float * c_warpMat,
|
||||||
|
int cols,
|
||||||
|
int rows,
|
||||||
|
int step_x,
|
||||||
|
int step_y
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int x = get_global_id(0);
|
||||||
|
int y = get_global_id(1);
|
||||||
|
step_x /= sizeof(float);
|
||||||
|
step_y /= sizeof(float);
|
||||||
|
|
||||||
|
if (x < cols && y < rows)
|
||||||
|
{
|
||||||
|
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
|
||||||
|
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
|
||||||
|
|
||||||
|
map_x[y * step_x + x] = xcoo;
|
||||||
|
map_y[y * step_y + x] = ycoo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel
|
||||||
|
void buildWarpPerspectiveMaps
|
||||||
|
(
|
||||||
|
__global float * xmap,
|
||||||
|
__global float * ymap,
|
||||||
|
__constant float * c_warpMat,
|
||||||
|
int cols,
|
||||||
|
int rows,
|
||||||
|
int step_x,
|
||||||
|
int step_y
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int x = get_global_id(0);
|
||||||
|
int y = get_global_id(1);
|
||||||
|
step_x /= sizeof(float);
|
||||||
|
step_y /= sizeof(float);
|
||||||
|
|
||||||
|
if (x < cols && y < rows)
|
||||||
|
{
|
||||||
|
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
|
||||||
|
|
||||||
|
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
|
||||||
|
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
|
||||||
|
|
||||||
|
map_x[y * step_x + x] = xcoo;
|
||||||
|
map_y[y * step_y + x] = ycoo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@@ -254,7 +254,8 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch
|
|||||||
//ss = convert_uint4(src[cur_addr]);
|
//ss = convert_uint4(src[cur_addr]);
|
||||||
|
|
||||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||||
ss = convert_uint4(src[(startY+i)*(src_step>>2) + cur_col]);
|
if(con)
|
||||||
|
ss = convert_uint4(src[(startY+i)*(src_step>>2) + cur_col]);
|
||||||
|
|
||||||
data[i] = con ? ss : 0;
|
data[i] = con ? ss : 0;
|
||||||
}
|
}
|
||||||
@@ -269,6 +270,7 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch
|
|||||||
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
||||||
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
||||||
|
|
||||||
|
|
||||||
data[i] = convert_uint4(src[selected_row * (src_step>>2) + selected_col]);
|
data[i] = convert_uint4(src[selected_row * (src_step>>2) + selected_col]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -334,11 +336,12 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float
|
|||||||
for(int i=0; i < ksY+1; i++)
|
for(int i=0; i < ksY+1; i++)
|
||||||
{
|
{
|
||||||
con = startX+col >= 0 && startX+col < src_whole_cols && startY+i >= 0 && startY+i < src_whole_rows;
|
con = startX+col >= 0 && startX+col < src_whole_cols && startY+i >= 0 && startY+i < src_whole_rows;
|
||||||
// int cur_addr = clamp((startY+i)*(src_step>>2)+(startX+col),0,end_addr);
|
//int cur_addr = clamp((startY+i)*(src_step>>2)+(startX+col),0,end_addr);
|
||||||
// ss = src[cur_addr];
|
//ss = src[cur_addr];
|
||||||
|
|
||||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||||
ss = src[(startY+i)*(src_step>>2) + cur_col];
|
//ss = src[(startY+i)*(src_step>>2) + cur_col];
|
||||||
|
ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>2) + cur_col]:0;
|
||||||
|
|
||||||
data[i] = con ? ss : 0.f;
|
data[i] = con ? ss : 0.f;
|
||||||
}
|
}
|
||||||
@@ -422,7 +425,8 @@ __kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global floa
|
|||||||
//ss = src[cur_addr];
|
//ss = src[cur_addr];
|
||||||
|
|
||||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||||
ss = src[(startY+i)*(src_step>>4) + cur_col];
|
//ss = src[(startY+i)*(src_step>>4) + cur_col];
|
||||||
|
ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>4) + cur_col]:0;
|
||||||
|
|
||||||
data[i] = con ? ss : (float4)(0.0,0.0,0.0,0.0);
|
data[i] = con ? ss : (float4)(0.0,0.0,0.0,0.0);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,84 +31,8 @@
|
|||||||
// and on any theory of liability, whether in contract, strict liability,
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
// or tort (including negligence or otherwise) arising in any way out of
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
// the use of this software, even if advised of the possibility of such damage.
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
//
|
|
||||||
//
|
|
||||||
|
|
||||||
|
__kernel void bilateral_C1_D0(__global uchar *dst,
|
||||||
//#pragma OPENCL EXTENSION cl_amd_printf :enable
|
|
||||||
__kernel
|
|
||||||
void bilateral4(__global uchar4 *dst,
|
|
||||||
__global uchar4 *src,
|
|
||||||
int rows,
|
|
||||||
int cols,
|
|
||||||
int channels,
|
|
||||||
int radius,
|
|
||||||
int wholerows,
|
|
||||||
int wholecols,
|
|
||||||
int src_step,
|
|
||||||
int dst_step,
|
|
||||||
int src_offset,
|
|
||||||
int dst_offset,
|
|
||||||
__constant float *sigClr,
|
|
||||||
__constant float *sigSpc)
|
|
||||||
{
|
|
||||||
uint lidx = get_local_id(0);
|
|
||||||
uint lidy = get_local_id(1);
|
|
||||||
|
|
||||||
uint gdx = get_global_id(0);
|
|
||||||
uint gdy = get_global_id(1);
|
|
||||||
|
|
||||||
uint gidx = gdx >=cols?cols-1:gdx;
|
|
||||||
uint gidy = gdy >=rows?rows-1:gdy;
|
|
||||||
|
|
||||||
uchar4 p,q,tmp;
|
|
||||||
|
|
||||||
float4 pf = 0,pq = 0,pd = 0;
|
|
||||||
float wt =0;
|
|
||||||
|
|
||||||
int r = radius;
|
|
||||||
int ij = 0;
|
|
||||||
int ct = 0;
|
|
||||||
|
|
||||||
uint index_src = src_offset/4 + gidy*src_step/4 + gidx;
|
|
||||||
uint index_dst = dst_offset/4 + gidy*dst_step/4 + gidx;
|
|
||||||
|
|
||||||
p = src[index_src];
|
|
||||||
|
|
||||||
uint gx,gy;
|
|
||||||
uint src_index,dst_index;
|
|
||||||
|
|
||||||
for(int ii = -r;ii<r+1;ii++)
|
|
||||||
{
|
|
||||||
for(int jj =-r;jj<r+1;jj++)
|
|
||||||
{
|
|
||||||
ij = ii*ii+jj*jj;
|
|
||||||
if(ij > mul24(radius,radius)) continue;
|
|
||||||
gx = gidx + jj;
|
|
||||||
gy = gidy + ii;
|
|
||||||
|
|
||||||
src_index = src_offset/4 + gy * src_step/4 + gx;
|
|
||||||
q = src[src_index];
|
|
||||||
|
|
||||||
|
|
||||||
ct = abs(p.x-q.x)+abs(p.y-q.y)+abs(p.z-q.z);
|
|
||||||
wt =sigClr[ct]*sigSpc[(ii+radius)*(2*radius+1)+jj+radius];
|
|
||||||
|
|
||||||
pf.x += q.x*wt;
|
|
||||||
pf.y += q.y*wt;
|
|
||||||
pf.z += q.z*wt;
|
|
||||||
// pf.w += q.w*wt;
|
|
||||||
|
|
||||||
pq += wt;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pd = pf/pq;
|
|
||||||
dst[index_dst] = convert_uchar4_rte(pd);
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void bilateral(__global uchar *dst,
|
|
||||||
__global const uchar *src,
|
__global const uchar *src,
|
||||||
const int dst_rows,
|
const int dst_rows,
|
||||||
const int dst_cols,
|
const int dst_cols,
|
||||||
@@ -128,8 +52,8 @@ __kernel void bilateral(__global uchar *dst,
|
|||||||
if((gidy<dst_rows) && (gidx<dst_cols))
|
if((gidy<dst_rows) && (gidx<dst_cols))
|
||||||
{
|
{
|
||||||
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
|
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
|
||||||
int dst_addr = mad24(gidy,src_step,gidx+dst_offset);
|
int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
|
||||||
float sum = 0, wsum = 0;
|
float sum = 0.f, wsum = 0.f;
|
||||||
|
|
||||||
int val0 = (int)src[src_addr];
|
int val0 = (int)src[src_addr];
|
||||||
for(int k = 0; k < maxk; k++ )
|
for(int k = 0; k < maxk; k++ )
|
||||||
@@ -142,4 +66,73 @@ __kernel void bilateral(__global uchar *dst,
|
|||||||
dst[dst_addr] = convert_uchar_rtz(sum/wsum+0.5f);
|
dst[dst_addr] = convert_uchar_rtz(sum/wsum+0.5f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
__kernel void bilateral2_C1_D0(__global uchar *dst,
|
||||||
|
__global const uchar *src,
|
||||||
|
const int dst_rows,
|
||||||
|
const int dst_cols,
|
||||||
|
const int maxk,
|
||||||
|
const int radius,
|
||||||
|
const int dst_step,
|
||||||
|
const int dst_offset,
|
||||||
|
const int src_step,
|
||||||
|
const int src_rows,
|
||||||
|
const int src_cols,
|
||||||
|
__constant float *color_weight,
|
||||||
|
__constant float *space_weight,
|
||||||
|
__constant int *space_ofs)
|
||||||
|
{
|
||||||
|
int gidx = get_global_id(0)<<2;
|
||||||
|
int gidy = get_global_id(1);
|
||||||
|
if((gidy<dst_rows) && (gidx<dst_cols))
|
||||||
|
{
|
||||||
|
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
|
||||||
|
int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
|
||||||
|
float4 sum = (float4)(0.f), wsum = (float4)(0.f);
|
||||||
|
|
||||||
|
int4 val0 = convert_int4(vload4(0,src+src_addr));
|
||||||
|
for(int k = 0; k < maxk; k++ )
|
||||||
|
{
|
||||||
|
int4 val = convert_int4(vload4(0,src+src_addr + space_ofs[k]));
|
||||||
|
float4 w = (float4)(space_weight[k])*(float4)(color_weight[abs(val.x - val0.x)],color_weight[abs(val.y - val0.y)],color_weight[abs(val.z - val0.z)],color_weight[abs(val.w - val0.w)]);
|
||||||
|
sum += convert_float4(val)*w;
|
||||||
|
wsum += w;
|
||||||
|
}
|
||||||
|
*(__global uchar4*)(dst+dst_addr) = convert_uchar4_rtz(sum/wsum+0.5f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
__kernel void bilateral_C4_D0(__global uchar4 *dst,
|
||||||
|
__global const uchar4 *src,
|
||||||
|
const int dst_rows,
|
||||||
|
const int dst_cols,
|
||||||
|
const int maxk,
|
||||||
|
const int radius,
|
||||||
|
const int dst_step,
|
||||||
|
const int dst_offset,
|
||||||
|
const int src_step,
|
||||||
|
const int src_rows,
|
||||||
|
const int src_cols,
|
||||||
|
__constant float *color_weight,
|
||||||
|
__constant float *space_weight,
|
||||||
|
__constant int *space_ofs)
|
||||||
|
{
|
||||||
|
int gidx = get_global_id(0);
|
||||||
|
int gidy = get_global_id(1);
|
||||||
|
if((gidy<dst_rows) && (gidx<dst_cols))
|
||||||
|
{
|
||||||
|
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
|
||||||
|
int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
|
||||||
|
float4 sum = (float4)0.f;
|
||||||
|
float wsum = 0.f;
|
||||||
|
|
||||||
|
int4 val0 = convert_int4(src[src_addr]);
|
||||||
|
for(int k = 0; k < maxk; k++ )
|
||||||
|
{
|
||||||
|
int4 val = convert_int4(src[src_addr + space_ofs[k]]);
|
||||||
|
float w = space_weight[k]*color_weight[abs(val.x - val0.x)+abs(val.y - val0.y)+abs(val.z - val0.z)];
|
||||||
|
sum += convert_float4(val)*(float4)w;
|
||||||
|
wsum += w;
|
||||||
|
}
|
||||||
|
wsum=1.f/wsum;
|
||||||
|
dst[dst_addr] = convert_uchar4_rtz(sum*(float4)wsum+(float4)0.5f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -144,16 +144,18 @@ __kernel void __attribute__((reqd_work_group_size(1,HISTOGRAM256_BIN_COUNT,1)))c
|
|||||||
int rowIndex = mad24(gy, gn, gx);
|
int rowIndex = mad24(gy, gn, gx);
|
||||||
// rowIndex &= (PARTIAL_HISTOGRAM256_COUNT - 1);
|
// rowIndex &= (PARTIAL_HISTOGRAM256_COUNT - 1);
|
||||||
|
|
||||||
__local int subhist[HISTOGRAM256_LOCAL_MEM_SIZE + 1];
|
__local int subhist[HISTOGRAM256_LOCAL_MEM_SIZE];
|
||||||
subhist[lidy] = 0;
|
subhist[lidy] = 0;
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
gidx = ((gidx>=left_col) ? (gidx+cols) : gidx);
|
gidx = ((gidx>=left_col) ? (gidx+cols) : gidx);
|
||||||
int src_index = src_offset + mad24(gidy, src_step, gidx);
|
if(gidy<rows)
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
{
|
||||||
int p = (int)src[src_index];
|
int src_index = src_offset + mad24(gidy, src_step, gidx);
|
||||||
p = gidy >= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p;
|
int p = (int)src[src_index];
|
||||||
atomic_inc(subhist + p);
|
// p = gidy >= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p;
|
||||||
|
atomic_inc(subhist + p);
|
||||||
|
}
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
globalHist[mad24(rowIndex, hist_step, lidy)] += subhist[lidy];
|
globalHist[mad24(rowIndex, hist_step, lidy)] += subhist[lidy];
|
||||||
|
|||||||
252
modules/ocl/src/kernels/interpolate_frames.cl
Normal file
252
modules/ocl/src/kernels/interpolate_frames.cl
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
||||||
|
|
||||||
|
// Image read mode
|
||||||
|
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
|
||||||
|
|
||||||
|
// atomic add for 32bit floating point
|
||||||
|
inline void atomic_addf(volatile __global float *source, const float operand) {
|
||||||
|
union {
|
||||||
|
unsigned int intVal;
|
||||||
|
float floatVal;
|
||||||
|
} newVal;
|
||||||
|
union {
|
||||||
|
unsigned int intVal;
|
||||||
|
float floatVal;
|
||||||
|
} prevVal;
|
||||||
|
do {
|
||||||
|
prevVal.floatVal = *source;
|
||||||
|
newVal.floatVal = prevVal.floatVal + operand;
|
||||||
|
} while (atomic_cmpxchg((volatile __global unsigned int *)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void memsetKernel(
|
||||||
|
float val,
|
||||||
|
__global float * image,
|
||||||
|
int width,
|
||||||
|
int height,
|
||||||
|
int step, // in element
|
||||||
|
int offset
|
||||||
|
)
|
||||||
|
{
|
||||||
|
if(get_global_id(0) >= width || get_global_id(1) >= height)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
image += offset;
|
||||||
|
image[get_global_id(0) + get_global_id(1) * step] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void normalizeKernel(
|
||||||
|
__global float * buffer,
|
||||||
|
int width,
|
||||||
|
int height,
|
||||||
|
int step,
|
||||||
|
int f_offset,
|
||||||
|
int d_offset
|
||||||
|
)
|
||||||
|
{
|
||||||
|
__global float * factors = buffer + f_offset;
|
||||||
|
__global float * dst = buffer + d_offset;
|
||||||
|
|
||||||
|
int j = get_global_id(0);
|
||||||
|
int i = get_global_id(1);
|
||||||
|
|
||||||
|
if(j >= width || i >= height)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
float scale = factors[step * i + j];
|
||||||
|
float invScale = (scale == 0.0f) ? 1.0f : (1.0f / scale);
|
||||||
|
|
||||||
|
dst[step * i + j] *= invScale;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void forwardWarpKernel(
|
||||||
|
__global const float * src,
|
||||||
|
__global float * buffer,
|
||||||
|
__global const float * u,
|
||||||
|
__global const float * v,
|
||||||
|
const int w,
|
||||||
|
const int h,
|
||||||
|
const int flow_stride,
|
||||||
|
const int image_stride,
|
||||||
|
const int factor_offset,
|
||||||
|
const int dst_offset,
|
||||||
|
const float time_scale
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int j = get_global_id(0);
|
||||||
|
int i = get_global_id(1);
|
||||||
|
|
||||||
|
if (i >= h || j >= w) return;
|
||||||
|
|
||||||
|
volatile __global float * normalization_factor = (volatile __global float *) buffer + factor_offset;
|
||||||
|
volatile __global float * dst = (volatile __global float *)buffer + dst_offset;
|
||||||
|
|
||||||
|
int flow_row_offset = i * flow_stride;
|
||||||
|
int image_row_offset = i * image_stride;
|
||||||
|
|
||||||
|
//bottom left corner of a target pixel
|
||||||
|
float cx = u[flow_row_offset + j] * time_scale + (float)j + 1.0f;
|
||||||
|
float cy = v[flow_row_offset + j] * time_scale + (float)i + 1.0f;
|
||||||
|
// pixel containing bottom left corner
|
||||||
|
float px;
|
||||||
|
float py;
|
||||||
|
float dx = modf(cx, &px);
|
||||||
|
float dy = modf(cy, &py);
|
||||||
|
// target pixel integer coords
|
||||||
|
int tx;
|
||||||
|
int ty;
|
||||||
|
tx = (int) px;
|
||||||
|
ty = (int) py;
|
||||||
|
float value = src[image_row_offset + j];
|
||||||
|
float weight;
|
||||||
|
// fill pixel containing bottom right corner
|
||||||
|
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||||
|
{
|
||||||
|
weight = dx * dy;
|
||||||
|
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
||||||
|
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill pixel containing bottom left corner
|
||||||
|
tx -= 1;
|
||||||
|
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||||
|
{
|
||||||
|
weight = (1.0f - dx) * dy;
|
||||||
|
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
||||||
|
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill pixel containing upper left corner
|
||||||
|
ty -= 1;
|
||||||
|
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||||
|
{
|
||||||
|
weight = (1.0f - dx) * (1.0f - dy);
|
||||||
|
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
||||||
|
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill pixel containing upper right corner
|
||||||
|
tx += 1;
|
||||||
|
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||||
|
{
|
||||||
|
weight = dx * (1.0f - dy);
|
||||||
|
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
||||||
|
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// define buffer offsets
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
O0_OS = 0,
|
||||||
|
O1_OS,
|
||||||
|
U_OS,
|
||||||
|
V_OS,
|
||||||
|
UR_OS,
|
||||||
|
VR_OS
|
||||||
|
};
|
||||||
|
|
||||||
|
__kernel void blendFramesKernel(
|
||||||
|
image2d_t tex_src0,
|
||||||
|
image2d_t tex_src1,
|
||||||
|
__global float * buffer,
|
||||||
|
__global float * out,
|
||||||
|
int w,
|
||||||
|
int h,
|
||||||
|
int step,
|
||||||
|
float theta
|
||||||
|
)
|
||||||
|
{
|
||||||
|
__global float * u = buffer + h * step * U_OS;
|
||||||
|
__global float * v = buffer + h * step * V_OS;
|
||||||
|
__global float * ur = buffer + h * step * UR_OS;
|
||||||
|
__global float * vr = buffer + h * step * VR_OS;
|
||||||
|
__global float * o0 = buffer + h * step * O0_OS;
|
||||||
|
__global float * o1 = buffer + h * step * O1_OS;
|
||||||
|
|
||||||
|
int ix = get_global_id(0);
|
||||||
|
int iy = get_global_id(1);
|
||||||
|
|
||||||
|
if(ix >= w || iy >= h) return;
|
||||||
|
|
||||||
|
int pos = ix + step * iy;
|
||||||
|
|
||||||
|
float _u = u[pos];
|
||||||
|
float _v = v[pos];
|
||||||
|
|
||||||
|
float _ur = ur[pos];
|
||||||
|
float _vr = vr[pos];
|
||||||
|
|
||||||
|
float x = (float)ix + 0.5f;
|
||||||
|
float y = (float)iy + 0.5f;
|
||||||
|
bool b0 = o0[pos] > 1e-4f;
|
||||||
|
bool b1 = o1[pos] > 1e-4f;
|
||||||
|
|
||||||
|
float2 coord0 = (float2)(x - _u * theta, y - _v * theta);
|
||||||
|
float2 coord1 = (float2)(x + _u * (1.0f - theta), y + _v * (1.0f - theta));
|
||||||
|
|
||||||
|
if (b0 && b1)
|
||||||
|
{
|
||||||
|
// pixel is visible on both frames
|
||||||
|
out[pos] = read_imagef(tex_src0, sampler, coord0).x * (1.0f - theta) +
|
||||||
|
read_imagef(tex_src1, sampler, coord1).x * theta;
|
||||||
|
}
|
||||||
|
else if (b0)
|
||||||
|
{
|
||||||
|
// visible on the first frame only
|
||||||
|
out[pos] = read_imagef(tex_src0, sampler, coord0).x;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// visible on the second frame only
|
||||||
|
out[pos] = read_imagef(tex_src1, sampler, coord1).x;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -52,7 +52,10 @@ using namespace cv::ocl;
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#if !defined (HAVE_OPENCL)
|
#if !defined (HAVE_OPENCL)
|
||||||
void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
|
void cv::ocl::matchTemplate(const oclMat &, const oclMat &, oclMat &)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
//helper routines
|
//helper routines
|
||||||
namespace cv
|
namespace cv
|
||||||
@@ -64,443 +67,430 @@ namespace cv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv { namespace ocl
|
namespace cv
|
||||||
{
|
{
|
||||||
void matchTemplate_SQDIFF(
|
namespace ocl
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
|
||||||
|
|
||||||
void matchTemplate_SQDIFF_NORMED(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
|
||||||
|
|
||||||
void matchTemplate_CCORR(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
|
||||||
|
|
||||||
void matchTemplate_CCORR_NORMED(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
|
||||||
|
|
||||||
void matchTemplate_CCOFF(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
|
||||||
|
|
||||||
void matchTemplate_CCOFF_NORMED(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
|
||||||
|
|
||||||
|
|
||||||
void matchTemplateNaive_SQDIFF(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn);
|
|
||||||
|
|
||||||
void matchTemplateNaive_CCORR(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn);
|
|
||||||
|
|
||||||
// Evaluates optimal template's area threshold. If
|
|
||||||
// template's area is less than the threshold, we use naive match
|
|
||||||
// template version, otherwise FFT-based (if available)
|
|
||||||
int getTemplateThreshold(int method, int depth)
|
|
||||||
{
|
{
|
||||||
switch (method)
|
void matchTemplate_SQDIFF(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
|
||||||
|
|
||||||
|
void matchTemplate_SQDIFF_NORMED(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
|
||||||
|
|
||||||
|
void matchTemplate_CCORR(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
|
||||||
|
|
||||||
|
void matchTemplate_CCORR_NORMED(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
|
||||||
|
|
||||||
|
void matchTemplate_CCOFF(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
|
||||||
|
|
||||||
|
void matchTemplate_CCOFF_NORMED(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
|
||||||
|
|
||||||
|
|
||||||
|
void matchTemplateNaive_SQDIFF(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, int cn);
|
||||||
|
|
||||||
|
void matchTemplateNaive_CCORR(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, int cn);
|
||||||
|
|
||||||
|
// Evaluates optimal template's area threshold. If
|
||||||
|
// template's area is less than the threshold, we use naive match
|
||||||
|
// template version, otherwise FFT-based (if available)
|
||||||
|
int getTemplateThreshold(int method, int depth)
|
||||||
{
|
{
|
||||||
case CV_TM_CCORR:
|
switch (method)
|
||||||
if (depth == CV_32F) return 250;
|
|
||||||
if (depth == CV_8U) return 300;
|
|
||||||
break;
|
|
||||||
case CV_TM_SQDIFF:
|
|
||||||
if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
|
|
||||||
if (depth == CV_8U) return 300;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// SQDIFF
|
|
||||||
void matchTemplate_SQDIFF(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &)
|
|
||||||
{
|
|
||||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
|
||||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
|
||||||
{
|
|
||||||
matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// TODO
|
|
||||||
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void matchTemplate_SQDIFF_NORMED(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
|
||||||
{
|
|
||||||
matchTemplate_CCORR(image,templ,result,buf);
|
|
||||||
buf.image_sums.resize(1);
|
|
||||||
|
|
||||||
|
|
||||||
integral(image.reshape(1), buf.image_sums[0]);
|
|
||||||
|
|
||||||
#if SQRSUM_FIXED
|
|
||||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
|
||||||
#else
|
|
||||||
Mat sqr_mat = templ.reshape(1);
|
|
||||||
unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
Context *clCxt = image.clCxt;
|
|
||||||
string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
|
|
||||||
vector< pair<size_t, const void *> > args;
|
|
||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
|
||||||
|
|
||||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
|
||||||
size_t localThreads[3] = {32, 8, 1};
|
|
||||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
|
|
||||||
}
|
|
||||||
|
|
||||||
void matchTemplateNaive_SQDIFF(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, int)
|
|
||||||
{
|
|
||||||
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
|
||||||
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
|
|
||||||
);
|
|
||||||
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
|
|
||||||
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
|
||||||
|
|
||||||
Context *clCxt = image.clCxt;
|
|
||||||
string kernelName = "matchTemplate_Naive_SQDIFF";
|
|
||||||
|
|
||||||
vector< pair<size_t, const void *> > args;
|
|
||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
|
||||||
|
|
||||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
|
||||||
size_t localThreads[3] = {32, 8, 1};
|
|
||||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// CCORR
|
|
||||||
void matchTemplate_CCORR(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
|
||||||
{
|
|
||||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
|
||||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
|
||||||
{
|
|
||||||
matchTemplateNaive_CCORR(image, templ, result, image.channels());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
|
|
||||||
if(image.depth() == CV_8U && templ.depth() == CV_8U)
|
|
||||||
{
|
{
|
||||||
image.convertTo(buf.imagef, CV_32F);
|
case CV_TM_CCORR:
|
||||||
templ.convertTo(buf.templf, CV_32F);
|
if (depth == CV_32F) return 250;
|
||||||
|
if (depth == CV_8U) return 300;
|
||||||
|
break;
|
||||||
|
case CV_TM_SQDIFF:
|
||||||
|
if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
|
||||||
|
if (depth == CV_8U) return 300;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
CV_Assert(image.channels() == 1);
|
CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
|
||||||
oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
|
return 0;
|
||||||
filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
|
|
||||||
result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void matchTemplate_CCORR_NORMED(
|
//////////////////////////////////////////////////////////////////////
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
// SQDIFF
|
||||||
{
|
void matchTemplate_SQDIFF(
|
||||||
matchTemplate_CCORR(image,templ,result,buf);
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &)
|
||||||
buf.image_sums.resize(1);
|
|
||||||
buf.image_sqsums.resize(1);
|
|
||||||
|
|
||||||
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
|
||||||
#if SQRSUM_FIXED
|
|
||||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
|
||||||
#else
|
|
||||||
oclMat templ_c1 = templ.reshape(1);
|
|
||||||
multiply(templ_c1, templ_c1, templ_c1);
|
|
||||||
unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
|
|
||||||
#endif
|
|
||||||
Context *clCxt = image.clCxt;
|
|
||||||
string kernelName = "normalizeKernel";
|
|
||||||
vector< pair<size_t, const void *> > args;
|
|
||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
|
||||||
|
|
||||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
|
||||||
size_t localThreads[3] = {32, 8, 1};
|
|
||||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
|
|
||||||
}
|
|
||||||
|
|
||||||
void matchTemplateNaive_CCORR(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, int)
|
|
||||||
{
|
|
||||||
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
|
||||||
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
|
|
||||||
);
|
|
||||||
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
|
|
||||||
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
|
||||||
|
|
||||||
Context *clCxt = image.clCxt;
|
|
||||||
string kernelName = "matchTemplate_Naive_CCORR";
|
|
||||||
|
|
||||||
vector< pair<size_t, const void *> > args;
|
|
||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
|
||||||
|
|
||||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
|
||||||
size_t localThreads[3] = {32, 8, 1};
|
|
||||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
|
||||||
}
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// CCOFF
|
|
||||||
void matchTemplate_CCOFF(
|
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
|
||||||
{
|
|
||||||
CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
|
|
||||||
|
|
||||||
matchTemplate_CCORR(image,templ,result,buf);
|
|
||||||
|
|
||||||
Context *clCxt = image.clCxt;
|
|
||||||
string kernelName;
|
|
||||||
|
|
||||||
kernelName = "matchTemplate_Prepared_CCOFF";
|
|
||||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
|
||||||
size_t localThreads[3] = {32, 8, 1};
|
|
||||||
|
|
||||||
vector< pair<size_t, const void *> > args;
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
|
||||||
// to be continued in the following section
|
|
||||||
if(image.channels() == 1)
|
|
||||||
{
|
{
|
||||||
|
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||||
|
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
||||||
|
{
|
||||||
|
matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// TODO
|
||||||
|
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void matchTemplate_SQDIFF_NORMED(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
|
||||||
|
{
|
||||||
|
matchTemplate_CCORR(image, templ, result, buf);
|
||||||
buf.image_sums.resize(1);
|
buf.image_sums.resize(1);
|
||||||
integral(image, buf.image_sums[0]);
|
|
||||||
|
|
||||||
float templ_sum = 0;
|
|
||||||
templ_sum = (float)sum(templ)[0] / templ.size().area();
|
integral(image.reshape(1), buf.image_sums[0]);
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
|
Context *clCxt = image.clCxt;
|
||||||
|
string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
void matchTemplateNaive_SQDIFF(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, int)
|
||||||
{
|
{
|
||||||
Vec4f templ_sum = Vec4f::all(0);
|
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
||||||
split(image,buf.images);
|
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
|
||||||
templ_sum = sum(templ) / templ.size().area();
|
);
|
||||||
buf.image_sums.resize(buf.images.size());
|
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.oclchannels() == 4) && result.channels() == 1);
|
||||||
|
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
||||||
|
|
||||||
|
Context *clCxt = image.clCxt;
|
||||||
|
string kernelName = "matchTemplate_Naive_SQDIFF";
|
||||||
|
|
||||||
for(int i = 0; i < image.channels(); i ++)
|
vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// CCORR
|
||||||
|
void matchTemplate_CCORR(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
|
||||||
|
{
|
||||||
|
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||||
|
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
||||||
{
|
{
|
||||||
integral(buf.images[i], buf.image_sums[i]);
|
matchTemplateNaive_CCORR(image, templ, result, image.channels());
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
switch(image.channels())
|
else
|
||||||
{
|
{
|
||||||
case 4:
|
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
if(image.depth() == CV_8U && templ.depth() == CV_8U)
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
|
{
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
|
image.convertTo(buf.imagef, CV_32F);
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
|
templ.convertTo(buf.templf, CV_32F);
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
}
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
CV_Assert(image.channels() == 1);
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
|
oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
|
filter2D(buf.imagef, o_result, CV_32F, buf.templf, Point(0, 0));
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
|
result = o_result(Rect(0, 0, image.rows - templ.rows + 1, image.cols - templ.cols + 1));
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
|
||||||
}
|
|
||||||
|
|
||||||
void matchTemplate_CCOFF_NORMED(
|
void matchTemplate_CCORR_NORMED(
|
||||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
|
||||||
{
|
|
||||||
image.convertTo(buf.imagef, CV_32F);
|
|
||||||
templ.convertTo(buf.templf, CV_32F);
|
|
||||||
|
|
||||||
matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
|
|
||||||
float scale = 1.f/templ.size().area();
|
|
||||||
|
|
||||||
Context *clCxt = image.clCxt;
|
|
||||||
string kernelName;
|
|
||||||
|
|
||||||
kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
|
|
||||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
|
||||||
size_t localThreads[3] = {32, 8, 1};
|
|
||||||
|
|
||||||
vector< pair<size_t, const void *> > args;
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
|
|
||||||
// to be continued in the following section
|
|
||||||
if(image.channels() == 1)
|
|
||||||
{
|
{
|
||||||
|
matchTemplate_CCORR(image, templ, result, buf);
|
||||||
buf.image_sums.resize(1);
|
buf.image_sums.resize(1);
|
||||||
buf.image_sqsums.resize(1);
|
buf.image_sqsums.resize(1);
|
||||||
integral(image, buf.image_sums[0], buf.image_sqsums[0]);
|
|
||||||
float templ_sum = 0;
|
|
||||||
float templ_sqsum = 0;
|
|
||||||
templ_sum = (float)sum(templ)[0];
|
|
||||||
#if SQRSUM_FIXED
|
|
||||||
templ_sqsum = sqrSum(templ)[0];
|
|
||||||
#else
|
|
||||||
oclMat templ_sqr = templ;
|
|
||||||
multiply(templ,templ, templ_sqr);
|
|
||||||
templ_sqsum = saturate_cast<float>(sum(templ_sqr)[0]);
|
|
||||||
#endif //SQRSUM_FIXED
|
|
||||||
templ_sqsum -= scale * templ_sum * templ_sum;
|
|
||||||
templ_sum *= scale;
|
|
||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
|
Context *clCxt = image.clCxt;
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
|
string kernelName = "normalizeKernel";
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
|
vector< pair<size_t, const void *> > args;
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
void matchTemplateNaive_CCORR(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, int)
|
||||||
{
|
{
|
||||||
Vec4f templ_sum = Vec4f::all(0);
|
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
||||||
Vec4f templ_sqsum = Vec4f::all(0);
|
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
|
||||||
|
);
|
||||||
|
CV_Assert(image.channels() == templ.channels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.channels() == 1);
|
||||||
|
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
||||||
|
|
||||||
split(image,buf.images);
|
Context *clCxt = image.clCxt;
|
||||||
templ_sum = sum(templ);
|
string kernelName = "matchTemplate_Naive_CCORR";
|
||||||
#if SQRSUM_FIXED
|
|
||||||
templ_sqsum = sqrSum(templ);
|
|
||||||
#else
|
|
||||||
oclMat templ_sqr = templ;
|
|
||||||
multiply(templ,templ, templ_sqr);
|
|
||||||
templ_sqsum = sum(templ_sqr);
|
|
||||||
#endif //SQRSUM_FIXED
|
|
||||||
templ_sqsum -= scale * templ_sum * templ_sum;
|
|
||||||
|
|
||||||
float templ_sqsum_sum = 0;
|
vector< pair<size_t, const void *> > args;
|
||||||
for(int i = 0; i < image.channels(); i ++)
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
|
||||||
|
}
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// CCOFF
|
||||||
|
void matchTemplate_CCOFF(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
|
||||||
|
{
|
||||||
|
CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
|
||||||
|
|
||||||
|
matchTemplate_CCORR(image, templ, result, buf);
|
||||||
|
|
||||||
|
Context *clCxt = image.clCxt;
|
||||||
|
string kernelName;
|
||||||
|
|
||||||
|
kernelName = "matchTemplate_Prepared_CCOFF";
|
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||||
|
// to be continued in the following section
|
||||||
|
if(image.channels() == 1)
|
||||||
{
|
{
|
||||||
templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
|
buf.image_sums.resize(1);
|
||||||
}
|
integral(image, buf.image_sums[0]);
|
||||||
templ_sum *= scale;
|
|
||||||
buf.image_sums.resize(buf.images.size());
|
float templ_sum = 0;
|
||||||
buf.image_sqsums.resize(buf.images.size());
|
templ_sum = (float)sum(templ)[0] / templ.size().area();
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum) );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Vec4f templ_sum = Vec4f::all(0);
|
||||||
|
split(image, buf.images);
|
||||||
|
templ_sum = sum(templ) / templ.size().area();
|
||||||
|
buf.image_sums.resize(buf.images.size());
|
||||||
|
|
||||||
|
|
||||||
|
for(int i = 0; i < image.channels(); i ++)
|
||||||
|
{
|
||||||
|
integral(buf.images[i], buf.image_sums[i]);
|
||||||
|
}
|
||||||
|
switch(image.oclchannels())
|
||||||
|
{
|
||||||
|
case 4:
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
|
||||||
|
}
|
||||||
|
|
||||||
|
void matchTemplate_CCOFF_NORMED(
|
||||||
|
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
|
||||||
|
{
|
||||||
|
image.convertTo(buf.imagef, CV_32F);
|
||||||
|
templ.convertTo(buf.templf, CV_32F);
|
||||||
|
|
||||||
|
matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
|
||||||
|
float scale = 1.f / templ.size().area();
|
||||||
|
|
||||||
|
Context *clCxt = image.clCxt;
|
||||||
|
string kernelName;
|
||||||
|
|
||||||
|
kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
|
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||||
|
size_t localThreads[3] = {32, 8, 1};
|
||||||
|
|
||||||
|
vector< pair<size_t, const void *> > args;
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&scale) );
|
||||||
|
// to be continued in the following section
|
||||||
|
if(image.channels() == 1)
|
||||||
|
{
|
||||||
|
buf.image_sums.resize(1);
|
||||||
|
buf.image_sqsums.resize(1);
|
||||||
|
integral(image, buf.image_sums[0], buf.image_sqsums[0]);
|
||||||
|
float templ_sum = 0;
|
||||||
|
float templ_sqsum = 0;
|
||||||
|
templ_sum = (float)sum(templ)[0];
|
||||||
|
|
||||||
|
templ_sqsum = sqrSum(templ)[0];
|
||||||
|
|
||||||
|
templ_sqsum -= scale * templ_sum * templ_sum;
|
||||||
|
templ_sum *= scale;
|
||||||
|
|
||||||
for(int i = 0; i < image.channels(); i ++)
|
|
||||||
{
|
|
||||||
integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch(image.channels())
|
|
||||||
{
|
|
||||||
case 4:
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum) );
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum) );
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
|
|
||||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Vec4f templ_sum = Vec4f::all(0);
|
||||||
|
Vec4f templ_sqsum = Vec4f::all(0);
|
||||||
|
|
||||||
|
split(image, buf.images);
|
||||||
|
templ_sum = sum(templ);
|
||||||
|
|
||||||
|
templ_sqsum = sqrSum(templ);
|
||||||
|
|
||||||
|
templ_sqsum -= scale * templ_sum * templ_sum;
|
||||||
|
|
||||||
|
float templ_sqsum_sum = 0;
|
||||||
|
for(int i = 0; i < image.oclchannels(); i ++)
|
||||||
|
{
|
||||||
|
templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
|
||||||
|
}
|
||||||
|
templ_sum *= scale;
|
||||||
|
buf.image_sums.resize(buf.images.size());
|
||||||
|
buf.image_sqsums.resize(buf.images.size());
|
||||||
|
|
||||||
|
for(int i = 0; i < image.oclchannels(); i ++)
|
||||||
|
{
|
||||||
|
integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
switch(image.oclchannels())
|
||||||
|
{
|
||||||
|
case 4:
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
|
||||||
}
|
}
|
||||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
|
||||||
}
|
|
||||||
|
|
||||||
}/*ocl*/} /*cv*/
|
}/*ocl*/
|
||||||
|
} /*cv*/
|
||||||
|
|
||||||
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
|
void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method)
|
||||||
{
|
{
|
||||||
MatchTemplateBuf buf;
|
MatchTemplateBuf buf;
|
||||||
matchTemplate(image,templ, result, method,buf);
|
matchTemplate(image, templ, result, method, buf);
|
||||||
}
|
}
|
||||||
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf)
|
void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf)
|
||||||
{
|
{
|
||||||
CV_Assert(image.type() == templ.type());
|
CV_Assert(image.type() == templ.type());
|
||||||
CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
|
CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
|
||||||
|
|
||||||
typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
|
typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &);
|
||||||
|
|
||||||
const Caller callers[] = {
|
const Caller callers[] =
|
||||||
::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
|
{
|
||||||
::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
|
::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
|
||||||
|
::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
|
||||||
::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
|
::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -63,8 +63,8 @@ namespace cv
|
|||||||
|
|
||||||
// provide additional methods for the user to interact with the command queue after a task is fired
|
// provide additional methods for the user to interact with the command queue after a task is fired
|
||||||
void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
||||||
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
|
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
|
||||||
int depth, char *build_options, FLUSH_MODE finish_mode)
|
int depth, char *build_options, FLUSH_MODE finish_mode)
|
||||||
{
|
{
|
||||||
//construct kernel name
|
//construct kernel name
|
||||||
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
||||||
@@ -80,7 +80,7 @@ namespace cv
|
|||||||
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
|
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
|
||||||
|
|
||||||
if ( localThreads != NULL)
|
if ( localThreads != NULL)
|
||||||
{
|
{
|
||||||
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
|
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
|
||||||
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
|
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
|
||||||
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
|
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
|
||||||
@@ -92,7 +92,7 @@ namespace cv
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
||||||
|
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
|
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
|
||||||
localThreads, 0, NULL, NULL));
|
localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
switch(finish_mode)
|
switch(finish_mode)
|
||||||
{
|
{
|
||||||
@@ -109,19 +109,19 @@ namespace cv
|
|||||||
}
|
}
|
||||||
|
|
||||||
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
|
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
|
||||||
size_t globalThreads[3], size_t localThreads[3],
|
size_t globalThreads[3], size_t localThreads[3],
|
||||||
vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode)
|
vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode)
|
||||||
{
|
{
|
||||||
openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args,
|
openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args,
|
||||||
channels, depth, NULL, finish_mode);
|
channels, depth, NULL, finish_mode);
|
||||||
}
|
}
|
||||||
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
|
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
|
||||||
size_t globalThreads[3], size_t localThreads[3],
|
size_t globalThreads[3], size_t localThreads[3],
|
||||||
vector< pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
|
vector< pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
|
||||||
|
|
||||||
{
|
{
|
||||||
openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
|
openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
|
||||||
build_options, finish_mode);
|
build_options, finish_mode);
|
||||||
}
|
}
|
||||||
}//namespace ocl
|
}//namespace ocl
|
||||||
|
|
||||||
|
|||||||
@@ -63,10 +63,10 @@ namespace cv
|
|||||||
DISABLE
|
DISABLE
|
||||||
};
|
};
|
||||||
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
||||||
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
|
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
|
||||||
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
||||||
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
|
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
|
||||||
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
|
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
|
||||||
}//namespace ocl
|
}//namespace ocl
|
||||||
|
|
||||||
}//namespace cv
|
}//namespace cv
|
||||||
|
|||||||
@@ -97,13 +97,13 @@ namespace cv
|
|||||||
size_t widthInBytes, size_t height);
|
size_t widthInBytes, size_t height);
|
||||||
void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
|
void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
|
||||||
const void *src, size_t spitch,
|
const void *src, size_t spitch,
|
||||||
size_t width, size_t height, enum openCLMemcpyKind kind, int channels=-1);
|
size_t width, size_t height, enum openCLMemcpyKind kind, int channels = -1);
|
||||||
void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
||||||
const void *src, size_t spitch,
|
const void *src, size_t spitch,
|
||||||
size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind);
|
size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind);
|
||||||
void openCLFree(void *devPtr);
|
void openCLFree(void *devPtr);
|
||||||
cl_mem openCLCreateBuffer(Context *clCxt,size_t flag, size_t size);
|
cl_mem openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
|
||||||
void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void* host_buffer, size_t size);
|
void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
|
||||||
cl_kernel openCLGetKernelFromSource(const Context *clCxt,
|
cl_kernel openCLGetKernelFromSource(const Context *clCxt,
|
||||||
const char **source, string kernelName);
|
const char **source, string kernelName);
|
||||||
cl_kernel openCLGetKernelFromSource(const Context *clCxt,
|
cl_kernel openCLGetKernelFromSource(const Context *clCxt,
|
||||||
@@ -113,8 +113,8 @@ namespace cv
|
|||||||
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, vector< std::pair<size_t, const void *> > &args,
|
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, vector< std::pair<size_t, const void *> > &args,
|
||||||
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
|
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
|
||||||
void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName,
|
void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName,
|
||||||
size_t globalThreads[3], size_t localThreads[3],
|
size_t globalThreads[3], size_t localThreads[3],
|
||||||
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
|
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
|
||||||
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
||||||
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth);
|
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth);
|
||||||
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
||||||
@@ -128,14 +128,14 @@ namespace cv
|
|||||||
|
|
||||||
//void openCLMemcpy2DWithNoPadding(cl_command_queue command_queue, cl_mem buffer, size_t size, size_t offset, void *ptr,
|
//void openCLMemcpy2DWithNoPadding(cl_command_queue command_queue, cl_mem buffer, size_t size, size_t offset, void *ptr,
|
||||||
// enum openCLMemcpyKind kind, cl_bool blocking_write);
|
// enum openCLMemcpyKind kind, cl_bool blocking_write);
|
||||||
int savetofile(const Context *clcxt, cl_program &program, const char *fileName);
|
int savetofile(const Context *clcxt, cl_program &program, const char *fileName);
|
||||||
struct Context::Impl
|
struct Context::Impl
|
||||||
{
|
{
|
||||||
//Information of the OpenCL context
|
//Information of the OpenCL context
|
||||||
cl_context clContext;
|
cl_context clContext;
|
||||||
cl_command_queue clCmdQueue;
|
cl_command_queue clCmdQueue;
|
||||||
cl_device_id *devices;
|
cl_device_id *devices;
|
||||||
string devName;
|
string devName;
|
||||||
cl_uint maxDimensions;
|
cl_uint maxDimensions;
|
||||||
size_t maxWorkGroupSize;
|
size_t maxWorkGroupSize;
|
||||||
size_t *maxWorkItemSizes;
|
size_t *maxWorkItemSizes;
|
||||||
@@ -143,8 +143,8 @@ namespace cv
|
|||||||
int double_support;
|
int double_support;
|
||||||
//extra options to recognize vendor specific fp64 extensions
|
//extra options to recognize vendor specific fp64 extensions
|
||||||
char *extra_options;
|
char *extra_options;
|
||||||
string Binpath;
|
string Binpath;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
// @Authors
|
// @Authors
|
||||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||||
// Yao Wang, yao@multicorewareinc.com
|
// Yao Wang, yao@multicorewareinc.com
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
@@ -100,19 +100,17 @@ void pyrdown_run(const oclMat &src, const oclMat &dst)
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
|
openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
|
||||||
}
|
}
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// pyrDown
|
// pyrDown
|
||||||
|
|
||||||
void cv::ocl::pyrDown(const oclMat& src, oclMat& dst)
|
void cv::ocl::pyrDown(const oclMat &src, oclMat &dst)
|
||||||
{
|
{
|
||||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||||
|
|
||||||
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
|
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
|
||||||
|
|
||||||
dst.download_channels=src.download_channels;
|
|
||||||
|
|
||||||
pyrdown_run(src, dst);
|
pyrdown_run(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -48,8 +48,8 @@ using namespace cv::ocl;
|
|||||||
|
|
||||||
#if !defined (HAVE_OPENCL)
|
#if !defined (HAVE_OPENCL)
|
||||||
|
|
||||||
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat&, const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat*) { }
|
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &, const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat *) { }
|
||||||
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat*) { }
|
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat *) { }
|
||||||
|
|
||||||
#else /* !defined (HAVE_OPENCL) */
|
#else /* !defined (HAVE_OPENCL) */
|
||||||
|
|
||||||
@@ -83,7 +83,7 @@ struct int2
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void calcPatchSize(cv::Size winSize, int cn, dim3& block, dim3& patch, bool isDeviceArch11)
|
void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
|
||||||
{
|
{
|
||||||
winSize.width *= cn;
|
winSize.width *= cn;
|
||||||
|
|
||||||
@@ -144,7 +144,7 @@ void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta)
|
|||||||
args.push_back( make_pair( sizeof(cl_float) , (void *)&alpha_f ));
|
args.push_back( make_pair( sizeof(cl_float) , (void *)&alpha_f ));
|
||||||
args.push_back( make_pair( sizeof(cl_float) , (void *)&beta_f ));
|
args.push_back( make_pair( sizeof(cl_float) , (void *)&beta_f ));
|
||||||
openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
|
openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
|
||||||
localThreads, args, dst.channels(), dst.depth(), CLFLUSH);
|
localThreads, args, dst.oclchannels(), dst.depth(), CLFLUSH);
|
||||||
}
|
}
|
||||||
void convertTo( const oclMat &src, oclMat &m, int rtype, double alpha = 1, double beta = 0 );
|
void convertTo( const oclMat &src, oclMat &m, int rtype, double alpha = 1, double beta = 0 );
|
||||||
void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta )
|
void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta )
|
||||||
@@ -157,7 +157,7 @@ void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double
|
|||||||
if( rtype < 0 )
|
if( rtype < 0 )
|
||||||
rtype = src.type();
|
rtype = src.type();
|
||||||
else
|
else
|
||||||
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels());
|
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels());
|
||||||
|
|
||||||
int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
|
int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
|
||||||
if( sdepth == ddepth && noScale )
|
if( sdepth == ddepth && noScale )
|
||||||
@@ -198,177 +198,177 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string
|
|||||||
{
|
{
|
||||||
globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
||||||
}
|
}
|
||||||
char compile_option[32];
|
char compile_option[32];
|
||||||
union sc
|
union sc
|
||||||
{
|
{
|
||||||
cl_uchar4 uval;
|
cl_uchar4 uval;
|
||||||
cl_char4 cval;
|
cl_char4 cval;
|
||||||
cl_ushort4 usval;
|
cl_ushort4 usval;
|
||||||
cl_short4 shval;
|
cl_short4 shval;
|
||||||
cl_int4 ival;
|
cl_int4 ival;
|
||||||
cl_float4 fval;
|
cl_float4 fval;
|
||||||
cl_double4 dval;
|
cl_double4 dval;
|
||||||
}val;
|
} val;
|
||||||
switch(dst.depth())
|
switch(dst.depth())
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
|
val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
|
||||||
val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
|
val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
|
||||||
val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
|
val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
|
||||||
val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
|
val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
|
||||||
switch(dst.channels())
|
switch(dst.oclchannels())
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
sprintf(compile_option, "-D GENTYPE=uchar");
|
sprintf(compile_option, "-D GENTYPE=uchar");
|
||||||
args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
|
args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
sprintf(compile_option, "-D GENTYPE=uchar4");
|
sprintf(compile_option, "-D GENTYPE=uchar4");
|
||||||
args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
|
args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
|
val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
|
||||||
val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
|
val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
|
||||||
val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
|
val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
|
||||||
val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
|
val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
|
||||||
switch(dst.channels())
|
switch(dst.oclchannels())
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
sprintf(compile_option, "-D GENTYPE=char");
|
sprintf(compile_option, "-D GENTYPE=char");
|
||||||
args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
|
args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
sprintf(compile_option, "-D GENTYPE=char4");
|
sprintf(compile_option, "-D GENTYPE=char4");
|
||||||
args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
|
args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
|
val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
|
||||||
val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
|
val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
|
||||||
val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
|
val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
|
||||||
val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
|
val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
|
||||||
switch(dst.channels())
|
switch(dst.oclchannels())
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
sprintf(compile_option, "-D GENTYPE=ushort");
|
sprintf(compile_option, "-D GENTYPE=ushort");
|
||||||
args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
|
args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
sprintf(compile_option, "-D GENTYPE=ushort4");
|
sprintf(compile_option, "-D GENTYPE=ushort4");
|
||||||
args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
|
args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
|
val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
|
||||||
val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
|
val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
|
||||||
val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
|
val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
|
||||||
val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
|
val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
|
||||||
switch(dst.channels())
|
switch(dst.oclchannels())
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
sprintf(compile_option, "-D GENTYPE=short");
|
sprintf(compile_option, "-D GENTYPE=short");
|
||||||
args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
|
args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
sprintf(compile_option, "-D GENTYPE=short4");
|
sprintf(compile_option, "-D GENTYPE=short4");
|
||||||
args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
|
args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
|
val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
|
||||||
val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
|
val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
|
||||||
val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
|
val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
|
||||||
val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
|
val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
|
||||||
switch(dst.channels())
|
switch(dst.oclchannels())
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
sprintf(compile_option, "-D GENTYPE=int");
|
sprintf(compile_option, "-D GENTYPE=int");
|
||||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
sprintf(compile_option, "-D GENTYPE=int2");
|
sprintf(compile_option, "-D GENTYPE=int2");
|
||||||
cl_int2 i2val;
|
cl_int2 i2val;
|
||||||
i2val.s[0] = val.ival.s[0];
|
i2val.s[0] = val.ival.s[0];
|
||||||
i2val.s[1] = val.ival.s[1];
|
i2val.s[1] = val.ival.s[1];
|
||||||
args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
|
args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
sprintf(compile_option, "-D GENTYPE=int4");
|
sprintf(compile_option, "-D GENTYPE=int4");
|
||||||
args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
|
args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
val.fval.s[0] = (float)scalar.val[0];
|
val.fval.s[0] = (float)scalar.val[0];
|
||||||
val.fval.s[1] = (float)scalar.val[1];
|
val.fval.s[1] = (float)scalar.val[1];
|
||||||
val.fval.s[2] = (float)scalar.val[2];
|
val.fval.s[2] = (float)scalar.val[2];
|
||||||
val.fval.s[3] = (float)scalar.val[3];
|
val.fval.s[3] = (float)scalar.val[3];
|
||||||
switch(dst.channels())
|
switch(dst.oclchannels())
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
sprintf(compile_option, "-D GENTYPE=float");
|
sprintf(compile_option, "-D GENTYPE=float");
|
||||||
args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
|
args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
sprintf(compile_option, "-D GENTYPE=float4");
|
sprintf(compile_option, "-D GENTYPE=float4");
|
||||||
args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
|
args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 6:
|
case 6:
|
||||||
val.dval.s[0] = scalar.val[0];
|
val.dval.s[0] = scalar.val[0];
|
||||||
val.dval.s[1] = scalar.val[1];
|
val.dval.s[1] = scalar.val[1];
|
||||||
val.dval.s[2] = scalar.val[2];
|
val.dval.s[2] = scalar.val[2];
|
||||||
val.dval.s[3] = scalar.val[3];
|
val.dval.s[3] = scalar.val[3];
|
||||||
switch(dst.channels())
|
switch(dst.oclchannels())
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
sprintf(compile_option, "-D GENTYPE=double");
|
sprintf(compile_option, "-D GENTYPE=double");
|
||||||
args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
|
args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
sprintf(compile_option, "-D GENTYPE=double4");
|
sprintf(compile_option, "-D GENTYPE=double4");
|
||||||
args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
|
args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat,"unknown depth");
|
CV_Error(CV_StsUnsupportedFormat, "unknown depth");
|
||||||
}
|
}
|
||||||
#if CL_VERSION_1_2
|
#if CL_VERSION_1_2
|
||||||
if(dst.offset==0 && dst.cols==dst.wholecols)
|
if(dst.offset == 0 && dst.cols == dst.wholecols)
|
||||||
{
|
{
|
||||||
clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue,(cl_mem)dst.data,args[0].second,args[0].first,0,dst.step*dst.rows,0,NULL,NULL);
|
clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
||||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
|
||||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
|
||||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
|
||||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
|
||||||
openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads,
|
openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads,
|
||||||
localThreads, args, -1, -1,compile_option, CLFLUSH);
|
localThreads, args, -1, -1, compile_option, CLFLUSH);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
||||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
|
||||||
@@ -376,7 +376,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string
|
|||||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
|
||||||
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
|
||||||
openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads,
|
openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads,
|
||||||
localThreads, args, -1, -1,compile_option, CLFLUSH);
|
localThreads, args, -1, -1, compile_option, CLFLUSH);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -385,30 +385,30 @@ oclMat &setTo(oclMat &src, const Scalar &scalar)
|
|||||||
CV_Assert( src.depth() >= 0 && src.depth() <= 6 );
|
CV_Assert( src.depth() >= 0 && src.depth() <= 6 );
|
||||||
CV_DbgAssert( !src.empty());
|
CV_DbgAssert( !src.empty());
|
||||||
|
|
||||||
if(src.type()==CV_8UC1)
|
if(src.type() == CV_8UC1)
|
||||||
{
|
{
|
||||||
set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask_C1_D0");
|
set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask_C1_D0");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask");
|
set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask");
|
||||||
}
|
}
|
||||||
|
|
||||||
return src;
|
return src;
|
||||||
}
|
}
|
||||||
|
|
||||||
void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
|
void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
|
||||||
{
|
{
|
||||||
if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
|
if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//dst.create(src1.size(), src1.type());
|
//dst.create(src1.size(), src1.type());
|
||||||
//CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
|
//CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
|
||||||
// src1.rows == src2.rows && src2.rows == dst.rows);
|
// src1.rows == src2.rows && src2.rows == dst.rows);
|
||||||
CV_Assert(src1.cols == dst.cols &&
|
CV_Assert(src1.cols == dst.cols &&
|
||||||
src1.rows == dst.rows);
|
src1.rows == dst.rows);
|
||||||
|
|
||||||
CV_Assert(src1.type() == dst.type());
|
CV_Assert(src1.type() == dst.type());
|
||||||
@@ -429,11 +429,11 @@ void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const ch
|
|||||||
//int cols = divUp(dst.cols * channels + offset_cols, vector_length);
|
//int cols = divUp(dst.cols * channels + offset_cols, vector_length);
|
||||||
|
|
||||||
size_t localThreads[3] = { 16, 16, 1 };
|
size_t localThreads[3] = { 16, 16, 1 };
|
||||||
//size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
|
//size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
|
||||||
// divUp(dst.rows, localThreads[1]) * localThreads[1],
|
// divUp(dst.rows, localThreads[1]) * localThreads[1],
|
||||||
// 1
|
// 1
|
||||||
// };
|
// };
|
||||||
size_t globalThreads[3] = { src1.cols,
|
size_t globalThreads[3] = { src1.cols,
|
||||||
src1.rows,
|
src1.rows,
|
||||||
1
|
1
|
||||||
};
|
};
|
||||||
@@ -455,8 +455,8 @@ void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const ch
|
|||||||
|
|
||||||
//if(_scalar != NULL)
|
//if(_scalar != NULL)
|
||||||
//{
|
//{
|
||||||
float scalar1 = *((float *)_scalar);
|
float scalar1 = *((float *)_scalar);
|
||||||
args.push_back( make_pair( sizeof(float), (float *)&scalar1 ));
|
args.push_back( make_pair( sizeof(float), (float *)&scalar1 ));
|
||||||
//}
|
//}
|
||||||
|
|
||||||
openCLExecuteKernel2(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, src1.depth(), CLFLUSH);
|
openCLExecuteKernel2(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, src1.depth(), CLFLUSH);
|
||||||
@@ -489,10 +489,10 @@ void pyrdown_run_cus(const oclMat &src, const oclMat &dst)
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
|
||||||
|
|
||||||
openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth(), CLFLUSH);
|
openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth(), CLFLUSH);
|
||||||
}
|
}
|
||||||
|
|
||||||
void pyrDown_cus(const oclMat& src, oclMat& dst)
|
void pyrDown_cus(const oclMat &src, oclMat &dst)
|
||||||
{
|
{
|
||||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||||
|
|
||||||
@@ -549,7 +549,7 @@ void pyrDown_cus(const oclMat& src, oclMat& dst)
|
|||||||
//
|
//
|
||||||
//void callT(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
|
//void callT(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
|
||||||
//{
|
//{
|
||||||
// if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) ||
|
// if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) ||
|
||||||
// !isAligned(dst.data, 4 * sizeof(double)) || !isAligned(dst.step, 4 * sizeof(double)))
|
// !isAligned(dst.data, 4 * sizeof(double)) || !isAligned(dst.step, 4 * sizeof(double)))
|
||||||
// {
|
// {
|
||||||
// callF(src, dst, op, mask);
|
// callF(src, dst, op, mask);
|
||||||
@@ -606,94 +606,94 @@ void pyrDown_cus(const oclMat& src, oclMat& dst)
|
|||||||
// //}
|
// //}
|
||||||
//}
|
//}
|
||||||
|
|
||||||
cl_mem bindTexture(const oclMat& mat, int depth, int channels)
|
cl_mem bindTexture(const oclMat &mat, int depth, int channels)
|
||||||
{
|
{
|
||||||
cl_mem texture;
|
cl_mem texture;
|
||||||
cl_image_format format;
|
cl_image_format format;
|
||||||
int err;
|
int err;
|
||||||
if(depth == 0)
|
if(depth == 0)
|
||||||
{
|
{
|
||||||
format.image_channel_data_type = CL_UNSIGNED_INT8;
|
format.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||||
}
|
}
|
||||||
else if(depth == 5)
|
else if(depth == 5)
|
||||||
{
|
{
|
||||||
format.image_channel_data_type = CL_FLOAT;
|
format.image_channel_data_type = CL_FLOAT;
|
||||||
}
|
}
|
||||||
if(channels == 1)
|
if(channels == 1)
|
||||||
{
|
{
|
||||||
format.image_channel_order = CL_R;
|
format.image_channel_order = CL_R;
|
||||||
}
|
}
|
||||||
else if(channels == 3)
|
else if(channels == 3)
|
||||||
{
|
{
|
||||||
format.image_channel_order = CL_RGB;
|
format.image_channel_order = CL_RGB;
|
||||||
}
|
}
|
||||||
else if(channels == 4)
|
else if(channels == 4)
|
||||||
{
|
{
|
||||||
format.image_channel_order = CL_RGBA;
|
format.image_channel_order = CL_RGBA;
|
||||||
}
|
}
|
||||||
#if CL_VERSION_1_2
|
#if CL_VERSION_1_2
|
||||||
cl_image_desc desc;
|
cl_image_desc desc;
|
||||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||||
desc.image_width = mat.step / mat.elemSize();
|
desc.image_width = mat.step / mat.elemSize();
|
||||||
desc.image_height = mat.rows;
|
desc.image_height = mat.rows;
|
||||||
desc.image_depth = NULL;
|
desc.image_depth = NULL;
|
||||||
desc.image_array_size = 1;
|
desc.image_array_size = 1;
|
||||||
desc.image_row_pitch = 0;
|
desc.image_row_pitch = 0;
|
||||||
desc.image_slice_pitch= 0;
|
desc.image_slice_pitch = 0;
|
||||||
desc.buffer = NULL;
|
desc.buffer = NULL;
|
||||||
desc.num_mip_levels = 0;
|
desc.num_mip_levels = 0;
|
||||||
desc.num_samples = 0;
|
desc.num_samples = 0;
|
||||||
texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
|
texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
|
||||||
#else
|
#else
|
||||||
texture = clCreateImage2D(
|
texture = clCreateImage2D(
|
||||||
mat.clCxt->impl->clContext,
|
mat.clCxt->impl->clContext,
|
||||||
CL_MEM_READ_WRITE,
|
CL_MEM_READ_WRITE,
|
||||||
&format,
|
&format,
|
||||||
mat.step / mat.elemSize(),
|
mat.step / mat.elemSize(),
|
||||||
mat.rows,
|
mat.rows,
|
||||||
0,
|
0,
|
||||||
NULL,
|
NULL,
|
||||||
&err);
|
&err);
|
||||||
#endif
|
#endif
|
||||||
size_t origin[] = { 0, 0, 0 };
|
size_t origin[] = { 0, 0, 0 };
|
||||||
size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 };
|
size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 };
|
||||||
clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0);
|
clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0);
|
||||||
openCLSafeCall(err);
|
openCLSafeCall(err);
|
||||||
|
|
||||||
return texture;
|
return texture;
|
||||||
}
|
}
|
||||||
|
|
||||||
void releaseTexture(cl_mem texture)
|
void releaseTexture(cl_mem texture)
|
||||||
{
|
{
|
||||||
openCLFree(texture);
|
openCLFree(texture);
|
||||||
}
|
}
|
||||||
|
|
||||||
void lkSparse_run(oclMat& I, oclMat& J,
|
void lkSparse_run(oclMat &I, oclMat &J,
|
||||||
const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err, bool GET_MIN_EIGENVALS, int ptcount,
|
const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err, bool GET_MIN_EIGENVALS, int ptcount,
|
||||||
int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
|
int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
|
||||||
{
|
{
|
||||||
Context *clCxt = I.clCxt;
|
Context *clCxt = I.clCxt;
|
||||||
|
|
||||||
string kernelName = "lkSparse";
|
string kernelName = "lkSparse";
|
||||||
|
|
||||||
size_t localThreads[3] = { 8, 32, 1 };
|
size_t localThreads[3] = { 8, 32, 1 };
|
||||||
size_t globalThreads[3] = { 8 * ptcount, 32, 1};
|
size_t globalThreads[3] = { 8 * ptcount, 32, 1};
|
||||||
|
|
||||||
int cn = I.channels();
|
int cn = I.oclchannels();
|
||||||
|
|
||||||
bool calcErr;
|
bool calcErr;
|
||||||
if (err)
|
if (err)
|
||||||
{
|
{
|
||||||
calcErr = true;
|
calcErr = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
calcErr = false;
|
calcErr = false;
|
||||||
}
|
}
|
||||||
calcErr = true;
|
calcErr = true;
|
||||||
|
|
||||||
cl_mem ITex = bindTexture(I, I.depth(), cn);
|
cl_mem ITex = bindTexture(I, I.depth(), cn);
|
||||||
cl_mem JTex = bindTexture(J, J.depth(), cn);
|
cl_mem JTex = bindTexture(J, J.depth(), cn);
|
||||||
|
|
||||||
vector<pair<size_t , const void *> > args;
|
vector<pair<size_t , const void *> > args;
|
||||||
|
|
||||||
@@ -718,13 +718,13 @@ void lkSparse_run(oclMat& I, oclMat& J,
|
|||||||
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
|
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
|
||||||
args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS ));
|
args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS ));
|
||||||
|
|
||||||
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth(), CLFLUSH);
|
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
|
||||||
|
|
||||||
releaseTexture(ITex);
|
releaseTexture(ITex);
|
||||||
releaseTexture(JTex);
|
releaseTexture(JTex);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err)
|
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err)
|
||||||
{
|
{
|
||||||
if (prevPts.empty())
|
if (prevPts.empty())
|
||||||
{
|
{
|
||||||
@@ -738,10 +738,10 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next
|
|||||||
|
|
||||||
iters = std::min(std::max(iters, 0), 100);
|
iters = std::min(std::max(iters, 0), 100);
|
||||||
|
|
||||||
const int cn = prevImg.channels();
|
const int cn = prevImg.oclchannels();
|
||||||
|
|
||||||
dim3 block, patch;
|
dim3 block, patch;
|
||||||
calcPatchSize(winSize, cn, block, patch, isDeviceArch11_);
|
calcPatchSize(winSize, cn, block, patch, isDeviceArch11_);
|
||||||
|
|
||||||
CV_Assert(derivLambda >= 0);
|
CV_Assert(derivLambda >= 0);
|
||||||
CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2);
|
CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2);
|
||||||
@@ -756,9 +756,9 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next
|
|||||||
|
|
||||||
oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
|
oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
|
||||||
oclMat temp2 = nextPts.reshape(1);
|
oclMat temp2 = nextPts.reshape(1);
|
||||||
//oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
|
//oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
|
||||||
multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f);
|
multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f);
|
||||||
//::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
|
//::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
|
||||||
|
|
||||||
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
|
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
|
||||||
//status.setTo(Scalar::all(1));
|
//status.setTo(Scalar::all(1));
|
||||||
@@ -781,12 +781,12 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//oclMat buf_;
|
//oclMat buf_;
|
||||||
// cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
|
// cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
|
||||||
// buf_.convertTo(prevPyr_[0], CV_32F);
|
// buf_.convertTo(prevPyr_[0], CV_32F);
|
||||||
|
|
||||||
// cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
|
// cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
|
||||||
// buf_.convertTo(nextPyr_[0], CV_32F);
|
// buf_.convertTo(nextPyr_[0], CV_32F);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int level = 1; level <= maxLevel; ++level)
|
for (int level = 1; level <= maxLevel; ++level)
|
||||||
@@ -799,16 +799,16 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next
|
|||||||
|
|
||||||
for (int level = maxLevel; level >= 0; level--)
|
for (int level = maxLevel; level >= 0; level--)
|
||||||
{
|
{
|
||||||
lkSparse_run(prevPyr_[level], nextPyr_[level],
|
lkSparse_run(prevPyr_[level], nextPyr_[level],
|
||||||
prevPts, nextPts, status, level == 0 && err ? err : 0, getMinEigenVals, prevPts.cols,
|
prevPts, nextPts, status, level == 0 && err ? err : 0, getMinEigenVals, prevPts.cols,
|
||||||
level, /*block, */patch, winSize, iters);
|
level, /*block, */patch, winSize, iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
clFinish(prevImg.clCxt->impl->clCmdQueue);
|
clFinish(prevImg.clCxt->impl->clCmdQueue);
|
||||||
}
|
}
|
||||||
|
|
||||||
void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v,
|
void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
|
||||||
oclMat& prevU, oclMat& prevV, oclMat* err, Size winSize, int iters)
|
oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters)
|
||||||
{
|
{
|
||||||
Context *clCxt = I.clCxt;
|
Context *clCxt = I.clCxt;
|
||||||
|
|
||||||
@@ -817,22 +817,22 @@ void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v,
|
|||||||
size_t localThreads[3] = { 16, 16, 1 };
|
size_t localThreads[3] = { 16, 16, 1 };
|
||||||
size_t globalThreads[3] = { I.cols, I.rows, 1};
|
size_t globalThreads[3] = { I.cols, I.rows, 1};
|
||||||
|
|
||||||
int cn = I.channels();
|
int cn = I.oclchannels();
|
||||||
|
|
||||||
bool calcErr;
|
bool calcErr;
|
||||||
if (err)
|
if (err)
|
||||||
{
|
{
|
||||||
calcErr = true;
|
calcErr = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
calcErr = false;
|
calcErr = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_mem ITex = bindTexture(I, I.depth(), cn);
|
cl_mem ITex = bindTexture(I, I.depth(), cn);
|
||||||
cl_mem JTex = bindTexture(J, J.depth(), cn);
|
cl_mem JTex = bindTexture(J, J.depth(), cn);
|
||||||
|
|
||||||
//int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2};
|
//int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2};
|
||||||
//const int patchWidth = 16 + 2 * halfWin.x;
|
//const int patchWidth = 16 + 2 * halfWin.x;
|
||||||
//const int patchHeight = 16 + 2 * halfWin.y;
|
//const int patchHeight = 16 + 2 * halfWin.y;
|
||||||
//size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);
|
//size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);
|
||||||
@@ -854,18 +854,18 @@ void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v,
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
|
||||||
//args.push_back( make_pair( sizeof(cl_mem), (void *)&(*err).data ));
|
//args.push_back( make_pair( sizeof(cl_mem), (void *)&(*err).data ));
|
||||||
//args.push_back( make_pair( sizeof(cl_int), (void *)&(*err).step ));
|
//args.push_back( make_pair( sizeof(cl_int), (void *)&(*err).step ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
|
||||||
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
|
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
|
||||||
|
|
||||||
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth(), CLFLUSH);
|
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
|
||||||
|
|
||||||
releaseTexture(ITex);
|
releaseTexture(ITex);
|
||||||
releaseTexture(JTex);
|
releaseTexture(JTex);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err)
|
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err)
|
||||||
{
|
{
|
||||||
CV_Assert(prevImg.type() == CV_8UC1);
|
CV_Assert(prevImg.type() == CV_8UC1);
|
||||||
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
|
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
|
||||||
@@ -894,7 +894,7 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextI
|
|||||||
uPyr_[1].setTo(Scalar::all(0));
|
uPyr_[1].setTo(Scalar::all(0));
|
||||||
vPyr_[1].setTo(Scalar::all(0));
|
vPyr_[1].setTo(Scalar::all(0));
|
||||||
|
|
||||||
Size winSize2i(winSize.width, winSize.height);
|
Size winSize2i(winSize.width, winSize.height);
|
||||||
|
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
|
|
||||||
@@ -903,7 +903,7 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextI
|
|||||||
int idx2 = (idx + 1) & 1;
|
int idx2 = (idx + 1) & 1;
|
||||||
|
|
||||||
lkDense_run(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
|
lkDense_run(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
|
||||||
level == 0 ? err : 0, winSize2i, iters);
|
level == 0 ? err : 0, winSize2i, iters);
|
||||||
|
|
||||||
if (level > 0)
|
if (level > 0)
|
||||||
idx = idx2;
|
idx = idx2;
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
// @Authors
|
// @Authors
|
||||||
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||||
// Yao Wang, yao@multicorewareinc.com
|
// Yao Wang, yao@multicorewareinc.com
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
@@ -55,36 +55,43 @@ using namespace cv::ocl;
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#ifndef HAVE_OPENCL
|
#ifndef HAVE_OPENCL
|
||||||
void cv::ocl::pyrUp(const oclMat&, GpuMat&, oclMat&) { throw_nogpu(); }
|
void cv::ocl::pyrUp(const oclMat &, GpuMat &, oclMat &)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
namespace cv { namespace ocl
|
namespace cv
|
||||||
{
|
{
|
||||||
extern const char *pyr_up;
|
namespace ocl
|
||||||
void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst)
|
{
|
||||||
{
|
extern const char *pyr_up;
|
||||||
dst.create(src.rows * 2, src.cols * 2, src.type());
|
void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst)
|
||||||
dst.download_channels=src.download_channels;
|
{
|
||||||
Context *clCxt = src.clCxt;
|
dst.create(src.rows * 2, src.cols * 2, src.type());
|
||||||
|
|
||||||
const std::string kernelName = "pyrUp";
|
Context *clCxt = src.clCxt;
|
||||||
|
|
||||||
std::vector< pair<size_t, const void *> > args;
|
const std::string kernelName = "pyrUp";
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
std::vector< pair<size_t, const void *> > args;
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
|
||||||
size_t globalThreads[3] = {dst.cols, dst.rows, 1};
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
|
||||||
|
size_t globalThreads[3] = {dst.cols, dst.rows, 1};
|
||||||
openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
}
|
|
||||||
}};
|
|
||||||
|
openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
||||||
@@ -114,7 +114,7 @@ namespace cv
|
|||||||
void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst)
|
void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst)
|
||||||
{
|
{
|
||||||
Context *clCxt = mat_dst.clCxt;
|
Context *clCxt = mat_dst.clCxt;
|
||||||
int channels = mat_dst.channels();
|
int channels = mat_dst.oclchannels();
|
||||||
int depth = mat_dst.depth();
|
int depth = mat_dst.depth();
|
||||||
|
|
||||||
string kernelName = "merge_vector";
|
string kernelName = "merge_vector";
|
||||||
@@ -125,11 +125,11 @@ namespace cv
|
|||||||
{4, 4, 2, 2, 1, 1, 1}
|
{4, 4, 2, 2, 1, 1, 1}
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t index = indexes[channels-1][mat_dst.depth()];
|
size_t index = indexes[channels - 1][mat_dst.depth()];
|
||||||
int cols = divUp(mat_dst.cols, index);
|
int cols = divUp(mat_dst.cols, index);
|
||||||
size_t localThreads[3] = { 64, 4, 1 };
|
size_t localThreads[3] = { 64, 4, 1 };
|
||||||
size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
|
size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
|
||||||
divUp(mat_dst.rows, localThreads[1]) * localThreads[1],
|
divUp(mat_dst.rows, localThreads[1]) *localThreads[1],
|
||||||
1
|
1
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -158,14 +158,14 @@ namespace cv
|
|||||||
|
|
||||||
void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
|
void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
|
||||||
{
|
{
|
||||||
if(mat_dst.clCxt -> impl -> double_support ==0 && mat_dst.type() == CV_64F)
|
if(mat_dst.clCxt -> impl -> double_support == 0 && mat_dst.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Context *clCxt = mat_dst.clCxt;
|
Context *clCxt = mat_dst.clCxt;
|
||||||
int channels = mat_dst.channels();
|
int channels = mat_dst.oclchannels();
|
||||||
int depth = mat_dst.depth();
|
int depth = mat_dst.depth();
|
||||||
|
|
||||||
string kernelName = "merge_vector";
|
string kernelName = "merge_vector";
|
||||||
@@ -176,15 +176,15 @@ namespace cv
|
|||||||
{1, 1, 1, 1, 1, 1, 1}
|
{1, 1, 1, 1, 1, 1, 1}
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t vector_length = vector_lengths[channels-1][depth];
|
size_t vector_length = vector_lengths[channels - 1][depth];
|
||||||
int offset_cols = (mat_dst.offset / mat_dst.elemSize()) & (vector_length - 1);
|
int offset_cols = (mat_dst.offset / mat_dst.elemSize()) & (vector_length - 1);
|
||||||
int cols = divUp(mat_dst.cols + offset_cols, vector_length);
|
int cols = divUp(mat_dst.cols + offset_cols, vector_length);
|
||||||
|
|
||||||
size_t localThreads[3] = { 64, 4, 1 };
|
size_t localThreads[3] = { 64, 4, 1 };
|
||||||
size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
|
size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
|
||||||
divUp(mat_dst.rows, localThreads[1]) * localThreads[1],
|
divUp(mat_dst.rows, localThreads[1]) *localThreads[1],
|
||||||
1
|
1
|
||||||
};
|
};
|
||||||
|
|
||||||
int dst_step1 = mat_dst.cols * mat_dst.elemSize();
|
int dst_step1 = mat_dst.cols * mat_dst.elemSize();
|
||||||
vector<pair<size_t , const void *> > args;
|
vector<pair<size_t , const void *> > args;
|
||||||
@@ -206,7 +206,7 @@ namespace cv
|
|||||||
|
|
||||||
// if channel == 3, then the matrix will convert to channel =4
|
// if channel == 3, then the matrix will convert to channel =4
|
||||||
//if(n == 3)
|
//if(n == 3)
|
||||||
// args.push_back( make_pair( sizeof(cl_int), (void *)&offset_cols));
|
// args.push_back( make_pair( sizeof(cl_int), (void *)&offset_cols));
|
||||||
|
|
||||||
if(n == 3)
|
if(n == 3)
|
||||||
{
|
{
|
||||||
@@ -214,7 +214,7 @@ namespace cv
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].offset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].offset));
|
||||||
}
|
}
|
||||||
else if( n== 4)
|
else if( n == 4)
|
||||||
{
|
{
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[3].data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[3].data));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[3].step));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[3].step));
|
||||||
@@ -243,7 +243,7 @@ namespace cv
|
|||||||
CV_Assert(depth == mat_src[i].depth());
|
CV_Assert(depth == mat_src[i].depth());
|
||||||
CV_Assert(size == mat_src[i].size());
|
CV_Assert(size == mat_src[i].size());
|
||||||
|
|
||||||
total_channels += mat_src[i].channels();
|
total_channels += mat_src[i].oclchannels();
|
||||||
}
|
}
|
||||||
|
|
||||||
CV_Assert(total_channels <= 4);
|
CV_Assert(total_channels <= 4);
|
||||||
@@ -263,7 +263,7 @@ namespace cv
|
|||||||
void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst)
|
void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst)
|
||||||
{
|
{
|
||||||
Context *clCxt = mat_src.clCxt;
|
Context *clCxt = mat_src.clCxt;
|
||||||
int channels = mat_src.channels();
|
int channels = mat_src.oclchannels();
|
||||||
int depth = mat_src.depth();
|
int depth = mat_src.depth();
|
||||||
|
|
||||||
string kernelName = "split_vector";
|
string kernelName = "split_vector";
|
||||||
@@ -274,13 +274,13 @@ namespace cv
|
|||||||
{4, 4, 2, 2, 1, 1, 1}
|
{4, 4, 2, 2, 1, 1, 1}
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t index = indexes[channels-1][mat_dst[0].depth()];
|
size_t index = indexes[channels - 1][mat_dst[0].depth()];
|
||||||
int cols = divUp(mat_src.cols, index);
|
int cols = divUp(mat_src.cols, index);
|
||||||
size_t localThreads[3] = { 64, 4, 1 };
|
size_t localThreads[3] = { 64, 4, 1 };
|
||||||
size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
|
size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
|
||||||
divUp(mat_src.rows, localThreads[1]) * localThreads[1],
|
divUp(mat_src.rows, localThreads[1]) *localThreads[1],
|
||||||
1
|
1
|
||||||
};
|
};
|
||||||
|
|
||||||
vector<pair<size_t , const void *> > args;
|
vector<pair<size_t , const void *> > args;
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data));
|
||||||
@@ -307,14 +307,14 @@ namespace cv
|
|||||||
void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
|
void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
|
||||||
{
|
{
|
||||||
|
|
||||||
if(mat_src.clCxt -> impl -> double_support ==0 && mat_src.type() == CV_64F)
|
if(mat_src.clCxt -> impl -> double_support == 0 && mat_src.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Context *clCxt = mat_src.clCxt;
|
Context *clCxt = mat_src.clCxt;
|
||||||
int channels = mat_src.channels();
|
int channels = mat_src.oclchannels();
|
||||||
int depth = mat_src.depth();
|
int depth = mat_src.depth();
|
||||||
|
|
||||||
string kernelName = "split_vector";
|
string kernelName = "split_vector";
|
||||||
@@ -325,7 +325,7 @@ namespace cv
|
|||||||
{4, 4, 2, 2, 1, 1, 1}
|
{4, 4, 2, 2, 1, 1, 1}
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t vector_length = vector_lengths[channels-1][mat_dst[0].depth()];
|
size_t vector_length = vector_lengths[channels - 1][mat_dst[0].depth()];
|
||||||
|
|
||||||
int max_offset_cols = 0;
|
int max_offset_cols = 0;
|
||||||
for(int i = 0; i < channels; i++)
|
for(int i = 0; i < channels; i++)
|
||||||
@@ -339,8 +339,8 @@ namespace cv
|
|||||||
: divUp(mat_src.cols + max_offset_cols, vector_length);
|
: divUp(mat_src.cols + max_offset_cols, vector_length);
|
||||||
|
|
||||||
size_t localThreads[3] = { 64, 4, 1 };
|
size_t localThreads[3] = { 64, 4, 1 };
|
||||||
size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
|
size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
|
||||||
divUp(mat_src.rows, localThreads[1]) * localThreads[1], 1
|
divUp(mat_src.rows, localThreads[1]) *localThreads[1], 1
|
||||||
};
|
};
|
||||||
|
|
||||||
int dst_step1 = mat_dst[0].cols * mat_dst[0].elemSize();
|
int dst_step1 = mat_dst[0].cols * mat_dst[0].elemSize();
|
||||||
@@ -379,7 +379,7 @@ namespace cv
|
|||||||
CV_Assert(mat_dst);
|
CV_Assert(mat_dst);
|
||||||
|
|
||||||
int depth = mat_src.depth();
|
int depth = mat_src.depth();
|
||||||
int num_channels = mat_src.channels();
|
int num_channels = mat_src.oclchannels();
|
||||||
Size size = mat_src.size();
|
Size size = mat_src.size();
|
||||||
|
|
||||||
if(num_channels == 1)
|
if(num_channels == 1)
|
||||||
@@ -413,8 +413,8 @@ void cv::ocl::split(const oclMat &src, oclMat *dst)
|
|||||||
}
|
}
|
||||||
void cv::ocl::split(const oclMat &src, vector<oclMat> &dst)
|
void cv::ocl::split(const oclMat &src, vector<oclMat> &dst)
|
||||||
{
|
{
|
||||||
dst.resize(src.channels());
|
dst.resize(src.oclchannels());
|
||||||
if(src.channels() > 0)
|
if(src.oclchannels() > 0)
|
||||||
split_merge::split(src, &dst[0]);
|
split_merge::split(src, &dst[0]);
|
||||||
}
|
}
|
||||||
#endif /* !defined (HAVE_OPENCL) */
|
#endif /* !defined (HAVE_OPENCL) */
|
||||||
|
|||||||
@@ -44,7 +44,7 @@
|
|||||||
//M*/
|
//M*/
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include "opencv2/highgui/highgui.hpp"
|
//#include "opencv2/highgui/highgui.hpp"
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::ocl;
|
using namespace cv::ocl;
|
||||||
@@ -52,25 +52,65 @@ using namespace std;
|
|||||||
|
|
||||||
#if !defined (HAVE_OPENCL)
|
#if !defined (HAVE_OPENCL)
|
||||||
|
|
||||||
cv::ocl::SURF_OCL::SURF_OCL() { throw_nogpu(); }
|
cv::ocl::SURF_OCL::SURF_OCL()
|
||||||
cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool) { throw_nogpu(); }
|
{
|
||||||
int cv::ocl::SURF_OCL::descriptorSize() const { throw_nogpu(); return 0;}
|
throw_nogpu();
|
||||||
void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>&, oclMat&) { throw_nogpu(); }
|
}
|
||||||
void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat&, vector<KeyPoint>&) { throw_nogpu(); }
|
cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool)
|
||||||
void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat&, vector<float>&) { throw_nogpu(); }
|
{
|
||||||
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
|
throw_nogpu();
|
||||||
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&, oclMat&, bool) { throw_nogpu(); }
|
}
|
||||||
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&) { throw_nogpu(); }
|
int cv::ocl::SURF_OCL::descriptorSize() const
|
||||||
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&, oclMat&, bool) { throw_nogpu(); }
|
{
|
||||||
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&, vector<float>&, bool) { throw_nogpu(); }
|
throw_nogpu();
|
||||||
void cv::ocl::SURF_OCL::releaseMemory() { throw_nogpu(); }
|
return 0;
|
||||||
|
}
|
||||||
|
void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint> &, oclMat &)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &, vector<KeyPoint> &)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat &, vector<float> &)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &, oclMat &, bool)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &, oclMat &, bool)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &, vector<float> &, bool)
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
void cv::ocl::SURF_OCL::releaseMemory()
|
||||||
|
{
|
||||||
|
throw_nogpu();
|
||||||
|
}
|
||||||
|
|
||||||
#else /* !defined (HAVE_OPENCL) */
|
#else /* !defined (HAVE_OPENCL) */
|
||||||
namespace cv { namespace ocl
|
namespace cv
|
||||||
{
|
{
|
||||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
namespace ocl
|
||||||
extern const char * nonfree_surf;
|
{
|
||||||
}}
|
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||||
|
extern const char *nonfree_surf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline int divUp(int total, int grain)
|
static inline int divUp(int total, int grain)
|
||||||
@@ -96,28 +136,28 @@ class SURF_OCL_Invoker
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
// facilities
|
// facilities
|
||||||
void bindImgTex(const oclMat& img, cl_mem & texture);
|
void bindImgTex(const oclMat &img, cl_mem &texture);
|
||||||
|
|
||||||
//void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
|
//void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
|
||||||
//void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
|
//void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
|
||||||
|
|
||||||
// kernel callers declearations
|
// kernel callers declearations
|
||||||
void icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int layer_rows);
|
void icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, int octave, int nOctaveLayers, int layer_rows);
|
||||||
|
|
||||||
void icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset,
|
void icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
|
||||||
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
|
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
|
||||||
|
|
||||||
void icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter,
|
void icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter,
|
||||||
oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures);
|
oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures);
|
||||||
|
|
||||||
void icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures);
|
void icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures);
|
||||||
|
|
||||||
void compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures);
|
void compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures);
|
||||||
// end of kernel callers declearations
|
// end of kernel callers declearations
|
||||||
|
|
||||||
|
|
||||||
SURF_OCL_Invoker(SURF_OCL& surf, const oclMat& img, const oclMat& mask) :
|
SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) :
|
||||||
surf_(surf),
|
surf_(surf),
|
||||||
img_cols(img.cols), img_rows(img.rows),
|
img_cols(img.cols), img_rows(img.rows),
|
||||||
use_mask(!mask.empty()),
|
use_mask(!mask.empty()),
|
||||||
imgTex(NULL), sumTex(NULL), maskSumTex(NULL)
|
imgTex(NULL), sumTex(NULL), maskSumTex(NULL)
|
||||||
@@ -159,13 +199,13 @@ public:
|
|||||||
// temp fix for missing min overload
|
// temp fix for missing min overload
|
||||||
oclMat temp(mask.size(), mask.type());
|
oclMat temp(mask.size(), mask.type());
|
||||||
temp.setTo(Scalar::all(1.0));
|
temp.setTo(Scalar::all(1.0));
|
||||||
//cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
|
//cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
|
||||||
integral(surf_.mask1, surf_.maskSum);
|
integral(surf_.mask1, surf_.maskSum);
|
||||||
bindImgTex(surf_.maskSum, maskSumTex);
|
bindImgTex(surf_.maskSum, maskSumTex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void detectKeypoints(oclMat& keypoints)
|
void detectKeypoints(oclMat &keypoints)
|
||||||
{
|
{
|
||||||
// create image pyramid buffers
|
// create image pyramid buffers
|
||||||
// different layers have same sized buffers, but they are sampled from gaussin kernel.
|
// different layers have same sized buffers, but they are sampled from gaussin kernel.
|
||||||
@@ -186,7 +226,7 @@ public:
|
|||||||
icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows);
|
icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows);
|
||||||
|
|
||||||
icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
|
icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
|
||||||
octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
|
octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
|
||||||
|
|
||||||
unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
|
unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
|
||||||
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
|
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
|
||||||
@@ -194,7 +234,7 @@ public:
|
|||||||
if (maxCounter > 0)
|
if (maxCounter > 0)
|
||||||
{
|
{
|
||||||
icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter,
|
icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter,
|
||||||
keypoints, counters, octave, layer_rows, maxFeatures);
|
keypoints, counters, octave, layer_rows, maxFeatures);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
|
unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
|
||||||
@@ -208,7 +248,7 @@ public:
|
|||||||
findOrientation(keypoints);
|
findOrientation(keypoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
void findOrientation(oclMat& keypoints)
|
void findOrientation(oclMat &keypoints)
|
||||||
{
|
{
|
||||||
const int nFeatures = keypoints.cols;
|
const int nFeatures = keypoints.cols;
|
||||||
if (nFeatures > 0)
|
if (nFeatures > 0)
|
||||||
@@ -217,7 +257,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void computeDescriptors(const oclMat& keypoints, oclMat& descriptors, int descriptorSize)
|
void computeDescriptors(const oclMat &keypoints, oclMat &descriptors, int descriptorSize)
|
||||||
{
|
{
|
||||||
const int nFeatures = keypoints.cols;
|
const int nFeatures = keypoints.cols;
|
||||||
if (nFeatures > 0)
|
if (nFeatures > 0)
|
||||||
@@ -239,7 +279,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SURF_OCL& surf_;
|
SURF_OCL &surf_;
|
||||||
|
|
||||||
int img_cols, img_rows;
|
int img_cols, img_rows;
|
||||||
|
|
||||||
@@ -257,8 +297,8 @@ private:
|
|||||||
|
|
||||||
oclMat additioalParamBuffer;
|
oclMat additioalParamBuffer;
|
||||||
|
|
||||||
SURF_OCL_Invoker& operator= (const SURF_OCL_Invoker& right)
|
SURF_OCL_Invoker &operator= (const SURF_OCL_Invoker &right)
|
||||||
{
|
{
|
||||||
(*this) = right;
|
(*this) = right;
|
||||||
return *this;
|
return *this;
|
||||||
} // remove warning C4512
|
} // remove warning C4512
|
||||||
@@ -289,7 +329,7 @@ int cv::ocl::SURF_OCL::descriptorSize() const
|
|||||||
return extended ? 128 : 64;
|
return extended ? 128 : 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>& keypoints, oclMat& keypointsGPU)
|
void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint> &keypoints, oclMat &keypointsGPU)
|
||||||
{
|
{
|
||||||
if (keypoints.empty())
|
if (keypoints.empty())
|
||||||
keypointsGPU.release();
|
keypointsGPU.release();
|
||||||
@@ -297,17 +337,17 @@ void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>& keypoints, oclMa
|
|||||||
{
|
{
|
||||||
Mat keypointsCPU(SURF_OCL::ROWS_COUNT, static_cast<int>(keypoints.size()), CV_32FC1);
|
Mat keypointsCPU(SURF_OCL::ROWS_COUNT, static_cast<int>(keypoints.size()), CV_32FC1);
|
||||||
|
|
||||||
float* kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
|
float *kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
|
||||||
float* kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
|
float *kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
|
||||||
int* kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
|
int *kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
|
||||||
int* kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
|
int *kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
|
||||||
float* kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
|
float *kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
|
||||||
float* kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
|
float *kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
|
||||||
float* kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
|
float *kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
|
||||||
|
|
||||||
for (size_t i = 0, size = keypoints.size(); i < size; ++i)
|
for (size_t i = 0, size = keypoints.size(); i < size; ++i)
|
||||||
{
|
{
|
||||||
const KeyPoint& kp = keypoints[i];
|
const KeyPoint &kp = keypoints[i];
|
||||||
kp_x[i] = kp.pt.x;
|
kp_x[i] = kp.pt.x;
|
||||||
kp_y[i] = kp.pt.y;
|
kp_y[i] = kp.pt.y;
|
||||||
kp_octave[i] = kp.octave;
|
kp_octave[i] = kp.octave;
|
||||||
@@ -321,7 +361,7 @@ void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>& keypoints, oclMa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat& keypointsGPU, vector<KeyPoint>& keypoints)
|
void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &keypointsGPU, vector<KeyPoint> &keypoints)
|
||||||
{
|
{
|
||||||
const int nFeatures = keypointsGPU.cols;
|
const int nFeatures = keypointsGPU.cols;
|
||||||
|
|
||||||
@@ -335,17 +375,17 @@ void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat& keypointsGPU, vector<Key
|
|||||||
|
|
||||||
keypoints.resize(nFeatures);
|
keypoints.resize(nFeatures);
|
||||||
|
|
||||||
float* kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
|
float *kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
|
||||||
float* kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
|
float *kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
|
||||||
int* kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
|
int *kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
|
||||||
int* kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
|
int *kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
|
||||||
float* kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
|
float *kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
|
||||||
float* kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
|
float *kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
|
||||||
float* kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
|
float *kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
|
||||||
|
|
||||||
for (int i = 0; i < nFeatures; ++i)
|
for (int i = 0; i < nFeatures; ++i)
|
||||||
{
|
{
|
||||||
KeyPoint& kp = keypoints[i];
|
KeyPoint &kp = keypoints[i];
|
||||||
kp.pt.x = kp_x[i];
|
kp.pt.x = kp_x[i];
|
||||||
kp.pt.y = kp_y[i];
|
kp.pt.y = kp_y[i];
|
||||||
kp.class_id = kp_laplacian[i];
|
kp.class_id = kp_laplacian[i];
|
||||||
@@ -357,7 +397,7 @@ void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat& keypointsGPU, vector<Key
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat& descriptorsGPU, vector<float>& descriptors)
|
void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat &descriptorsGPU, vector<float> &descriptors)
|
||||||
{
|
{
|
||||||
if (descriptorsGPU.empty())
|
if (descriptorsGPU.empty())
|
||||||
descriptors.clear();
|
descriptors.clear();
|
||||||
@@ -371,7 +411,7 @@ void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat& descriptorsGPU, vector
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints)
|
void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints)
|
||||||
{
|
{
|
||||||
if (!img.empty())
|
if (!img.empty())
|
||||||
{
|
{
|
||||||
@@ -381,8 +421,8 @@ void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors,
|
void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints, oclMat &descriptors,
|
||||||
bool useProvidedKeypoints)
|
bool useProvidedKeypoints)
|
||||||
{
|
{
|
||||||
if (!img.empty())
|
if (!img.empty())
|
||||||
{
|
{
|
||||||
@@ -399,7 +439,7 @@ void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints)
|
void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, vector<KeyPoint> &keypoints)
|
||||||
{
|
{
|
||||||
oclMat keypointsGPU;
|
oclMat keypointsGPU;
|
||||||
|
|
||||||
@@ -408,8 +448,8 @@ void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector
|
|||||||
downloadKeypoints(keypointsGPU, keypoints);
|
downloadKeypoints(keypointsGPU, keypoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints,
|
void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, vector<KeyPoint> &keypoints,
|
||||||
oclMat& descriptors, bool useProvidedKeypoints)
|
oclMat &descriptors, bool useProvidedKeypoints)
|
||||||
{
|
{
|
||||||
oclMat keypointsGPU;
|
oclMat keypointsGPU;
|
||||||
|
|
||||||
@@ -421,8 +461,8 @@ void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector
|
|||||||
downloadKeypoints(keypointsGPU, keypoints);
|
downloadKeypoints(keypointsGPU, keypoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints,
|
void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, vector<KeyPoint> &keypoints,
|
||||||
vector<float>& descriptors, bool useProvidedKeypoints)
|
vector<float> &descriptors, bool useProvidedKeypoints)
|
||||||
{
|
{
|
||||||
oclMat descriptorsGPU;
|
oclMat descriptorsGPU;
|
||||||
|
|
||||||
@@ -444,7 +484,7 @@ void cv::ocl::SURF_OCL::releaseMemory()
|
|||||||
|
|
||||||
|
|
||||||
// bind source buffer to image oject.
|
// bind source buffer to image oject.
|
||||||
void SURF_OCL_Invoker::bindImgTex(const oclMat& img, cl_mem& texture)
|
void SURF_OCL_Invoker::bindImgTex(const oclMat &img, cl_mem &texture)
|
||||||
{
|
{
|
||||||
cl_image_format format;
|
cl_image_format format;
|
||||||
int err;
|
int err;
|
||||||
@@ -494,31 +534,31 @@ void SURF_OCL_Invoker::bindImgTex(const oclMat& img, cl_mem& texture)
|
|||||||
desc.image_depth = 0;
|
desc.image_depth = 0;
|
||||||
desc.image_array_size = 1;
|
desc.image_array_size = 1;
|
||||||
desc.image_row_pitch = 0;
|
desc.image_row_pitch = 0;
|
||||||
desc.image_slice_pitch= 0;
|
desc.image_slice_pitch = 0;
|
||||||
desc.buffer = NULL;
|
desc.buffer = NULL;
|
||||||
desc.num_mip_levels = 0;
|
desc.num_mip_levels = 0;
|
||||||
desc.num_samples = 0;
|
desc.num_samples = 0;
|
||||||
texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
|
texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
|
||||||
#else
|
#else
|
||||||
texture = clCreateImage2D(
|
texture = clCreateImage2D(
|
||||||
Context::getContext()->impl->clContext,
|
Context::getContext()->impl->clContext,
|
||||||
CL_MEM_READ_WRITE,
|
CL_MEM_READ_WRITE,
|
||||||
&format,
|
&format,
|
||||||
img.step / img.elemSize(),
|
img.step / img.elemSize(),
|
||||||
img.rows,
|
img.rows,
|
||||||
0,
|
0,
|
||||||
NULL,
|
NULL,
|
||||||
&err);
|
&err);
|
||||||
#endif
|
#endif
|
||||||
size_t origin[] = { 0, 0, 0 };
|
size_t origin[] = { 0, 0, 0 };
|
||||||
size_t region[] = { img.step/img.elemSize(), img.rows, 1 };
|
size_t region[] = { img.step / img.elemSize(), img.rows, 1 };
|
||||||
clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);
|
clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);
|
||||||
openCLSafeCall(err);
|
openCLSafeCall(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
// kernel caller definitions
|
// kernel caller definitions
|
||||||
void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int c_layer_rows)
|
void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, int octave, int nOctaveLayers, int c_layer_rows)
|
||||||
{
|
{
|
||||||
const int min_size = calcSize(octave, 0);
|
const int min_size = calcSize(octave, 0);
|
||||||
const int max_samples_i = 1 + ((img_rows - min_size) >> octave);
|
const int max_samples_i = 1 + ((img_rows - min_size) >> octave);
|
||||||
@@ -540,15 +580,17 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, i
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows));
|
||||||
|
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
size_t globalThreads[3] = {
|
size_t globalThreads[3] =
|
||||||
divUp(max_samples_j, localThreads[0]) * localThreads[0],
|
{
|
||||||
divUp(max_samples_i, localThreads[1]) * localThreads[1] * (nOctaveLayers + 2),
|
divUp(max_samples_j, localThreads[0]) *localThreads[0],
|
||||||
1};
|
divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
|
||||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
1
|
||||||
|
};
|
||||||
|
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset,
|
void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
|
||||||
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols)
|
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols)
|
||||||
{
|
{
|
||||||
const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1;
|
const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1;
|
||||||
|
|
||||||
@@ -578,15 +620,16 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat&
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) * localThreads[0],
|
size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) *localThreads[0],
|
||||||
divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) * nLayers * localThreads[1],
|
divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) *nLayers *localThreads[1],
|
||||||
1};
|
1
|
||||||
|
};
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter,
|
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter,
|
||||||
oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures)
|
oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures)
|
||||||
{
|
{
|
||||||
Context *clCxt = det.clCxt;
|
Context *clCxt = det.clCxt;
|
||||||
string kernelName = "icvInterpolateKeypoint";
|
string kernelName = "icvInterpolateKeypoint";
|
||||||
@@ -605,14 +648,14 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMa
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&maxFeatures));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&maxFeatures));
|
||||||
|
|
||||||
size_t localThreads[3] = {3, 3, 3};
|
size_t localThreads[3] = {3, 3, 3};
|
||||||
size_t globalThreads[3] = {maxCounter * localThreads[0], localThreads[1], 1};
|
size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1};
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures)
|
void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures)
|
||||||
{
|
{
|
||||||
Context * clCxt = counters.clCxt;
|
Context *clCxt = counters.clCxt;
|
||||||
string kernelName = "icvCalcOrientation";
|
string kernelName = "icvCalcOrientation";
|
||||||
|
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
@@ -624,12 +667,12 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat& keypoints, int nFeat
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
|
||||||
|
|
||||||
size_t localThreads[3] = {32, 4, 1};
|
size_t localThreads[3] = {32, 4, 1};
|
||||||
size_t globalThreads[3] = {nFeatures * localThreads[0], localThreads[1], 1};
|
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures)
|
void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures)
|
||||||
{
|
{
|
||||||
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
|
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
|
||||||
Context *clCxt = descriptors.clCxt;
|
Context *clCxt = descriptors.clCxt;
|
||||||
|
|||||||
@@ -81,14 +81,14 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
print_info();
|
print_info();
|
||||||
|
|
||||||
std::vector<cv::ocl::Info> oclinfo;
|
std::vector<cv::ocl::Info> oclinfo;
|
||||||
int devnums = getDevice(oclinfo);
|
int devnums = getDevice(oclinfo);
|
||||||
if(devnums<1)
|
if(devnums < 1)
|
||||||
{
|
{
|
||||||
std::cout << "no device found\n";
|
std::cout << "no device found\n";
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
//setDevice(oclinfo[2]);
|
//setDevice(oclinfo[1]);
|
||||||
return RUN_ALL_TESTS();
|
return RUN_ALL_TESTS();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -143,6 +143,10 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
|
|||||||
src1y = rng.uniform(0, mat1.rows - roirows);
|
src1y = rng.uniform(0, mat1.rows - roirows);
|
||||||
dstx = rng.uniform(0, dst.cols - roicols);
|
dstx = rng.uniform(0, dst.cols - roicols);
|
||||||
dsty = rng.uniform(0, dst.rows - roirows);
|
dsty = rng.uniform(0, dst.rows - roirows);
|
||||||
|
maskx = rng.uniform(0, mask.cols - roicols);
|
||||||
|
masky = rng.uniform(0, mask.rows - roirows);
|
||||||
|
src2x = rng.uniform(0, mat2.cols - roicols);
|
||||||
|
src2y = rng.uniform(0, mat2.rows - roirows);
|
||||||
#else
|
#else
|
||||||
roicols = mat1.cols;
|
roicols = mat1.cols;
|
||||||
roirows = mat1.rows;
|
roirows = mat1.rows;
|
||||||
@@ -150,11 +154,11 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
|
|||||||
src1y = 0;
|
src1y = 0;
|
||||||
dstx = 0;
|
dstx = 0;
|
||||||
dsty = 0;
|
dsty = 0;
|
||||||
|
maskx = 0;
|
||||||
|
masky = 0;
|
||||||
|
src2x = 0;
|
||||||
|
src2y = 0;
|
||||||
#endif
|
#endif
|
||||||
maskx = rng.uniform(0, mask.cols - roicols);
|
|
||||||
masky = rng.uniform(0, mask.rows - roirows);
|
|
||||||
src2x = rng.uniform(0, mat2.cols - roicols);
|
|
||||||
src2y = rng.uniform(0, mat2.rows - roirows);
|
|
||||||
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
|
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
|
||||||
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
|
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
|
||||||
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
|
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
|
||||||
@@ -1454,7 +1458,7 @@ TEST_P(MagnitudeSqr, Mat)
|
|||||||
float val1 = mat1.at<float>(i, j);
|
float val1 = mat1.at<float>(i, j);
|
||||||
float val2 = mat2.at<float>(i, j);
|
float val2 = mat2.at<float>(i, j);
|
||||||
|
|
||||||
((float *)(dst.data))[i *dst.step/4 +j] = val1 * val1 + val2 * val2;
|
((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
|
||||||
|
|
||||||
// float val1 =((float *)( mat1.data))[(i*mat1.step/8 +j)*2];
|
// float val1 =((float *)( mat1.data))[(i*mat1.step/8 +j)*2];
|
||||||
//
|
//
|
||||||
@@ -1525,40 +1529,40 @@ INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(
|
|||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(false)));
|
Values(false)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
|
||||||
Values(CV_8UC1,CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(
|
||||||
Values(CV_32FC1, CV_32FC3,CV_32FC4),
|
Values(CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(
|
||||||
Values(CV_32FC1, CV_32FC3,CV_32FC4),
|
Values(CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(
|
||||||
Values(CV_32FC1, CV_32FC3,CV_32FC4),
|
Values(CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32FC1),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32FC1),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(
|
||||||
@@ -1578,24 +1582,24 @@ INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(
|
|||||||
Values(false)));
|
Values(false)));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
|
INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
|
||||||
// Values(false) is the reserved parameter
|
// Values(false) is the reserved parameter
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(
|
||||||
Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1,CV_32FC3, CV_32FC4), Values(false)));
|
Values(CV_8UC1, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
|
||||||
//Values(false) is the reserved parameter
|
//Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(
|
||||||
Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
|
Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
|
||||||
//Values(false) is the reserved parameter
|
//Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(
|
||||||
Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
|
Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
|
||||||
//Values(false) is the reserved parameter
|
//Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(
|
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(
|
||||||
Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
|
Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
|
||||||
//Values(false) is the reserved parameter
|
//Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1, CV_32SC1, CV_32FC1), Values(false)));
|
INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1, CV_32SC1, CV_32FC1), Values(false)));
|
||||||
|
|||||||
@@ -6,9 +6,9 @@ using namespace cv::ocl;
|
|||||||
using namespace cvtest;
|
using namespace cvtest;
|
||||||
using namespace testing;
|
using namespace testing;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold)
|
void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold)
|
||||||
{
|
{
|
||||||
result_gold.create(img1.size(), img1.type());
|
result_gold.create(img1.size(), img1.type());
|
||||||
|
|
||||||
@@ -16,11 +16,11 @@ void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& we
|
|||||||
|
|
||||||
for (int y = 0; y < img1.rows; ++y)
|
for (int y = 0; y < img1.rows; ++y)
|
||||||
{
|
{
|
||||||
const float* weights1_row = weights1.ptr<float>(y);
|
const float *weights1_row = weights1.ptr<float>(y);
|
||||||
const float* weights2_row = weights2.ptr<float>(y);
|
const float *weights2_row = weights2.ptr<float>(y);
|
||||||
const T* img1_row = img1.ptr<T>(y);
|
const T *img1_row = img1.ptr<T>(y);
|
||||||
const T* img2_row = img2.ptr<T>(y);
|
const T *img2_row = img2.ptr<T>(y);
|
||||||
T* result_gold_row = result_gold.ptr<T>(y);
|
T *result_gold_row = result_gold.ptr<T>(y);
|
||||||
|
|
||||||
for (int x = 0; x < img1.cols * cn; ++x)
|
for (int x = 0; x < img1.cols * cn; ++x)
|
||||||
{
|
{
|
||||||
@@ -59,16 +59,16 @@ TEST_P(Blend, Accuracy)
|
|||||||
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
|
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
|
||||||
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
|
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
|
||||||
|
|
||||||
cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F);
|
cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F);
|
||||||
cv::ocl::oclMat dst(size, type);
|
cv::ocl::oclMat dst(size, type);
|
||||||
gimg1.upload(img1);
|
gimg1.upload(img1);
|
||||||
gimg2.upload(img2);
|
gimg2.upload(img2);
|
||||||
gweights1.upload(weights1);
|
gweights1.upload(weights1);
|
||||||
gweights2.upload(weights2);
|
gweights2.upload(weights2);
|
||||||
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst);
|
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst);
|
||||||
cv::Mat result;
|
cv::Mat result;
|
||||||
cv::Mat result_gold;
|
cv::Mat result_gold;
|
||||||
dst.download(result);
|
dst.download(result);
|
||||||
if (depth == CV_8U)
|
if (depth == CV_8U)
|
||||||
blendLinearGold<uchar>(img1, img2, weights1, weights2, result_gold);
|
blendLinearGold<uchar>(img1, img2, weights1, weights2, result_gold);
|
||||||
else
|
else
|
||||||
@@ -78,6 +78,7 @@ TEST_P(Blend, Accuracy)
|
|||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
|
||||||
DIFFERENT_SIZES,
|
DIFFERENT_SIZES,
|
||||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3),MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
|
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
|
||||||
));
|
));
|
||||||
|
#endif
|
||||||
@@ -40,180 +40,181 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
namespace {
|
namespace
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// BruteForceMatcher
|
|
||||||
|
|
||||||
CV_ENUM(DistType, cv::ocl::BruteForceMatcher_OCL_base::L1Dist, cv::ocl::BruteForceMatcher_OCL_base::L2Dist, cv::ocl::BruteForceMatcher_OCL_base::HammingDist)
|
|
||||||
IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
|
|
||||||
|
|
||||||
PARAM_TEST_CASE(BruteForceMatcher/*, NormCode*/, DistType, DescriptorSize)
|
|
||||||
{
|
{
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
|
||||||
cv::ocl::BruteForceMatcher_OCL_base::DistType distType;
|
|
||||||
int normCode;
|
|
||||||
int dim;
|
|
||||||
|
|
||||||
int queryDescCount;
|
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
int countFactor;
|
// BruteForceMatcher
|
||||||
|
|
||||||
cv::Mat query, train;
|
CV_ENUM(DistType, cv::ocl::BruteForceMatcher_OCL_base::L1Dist, cv::ocl::BruteForceMatcher_OCL_base::L2Dist, cv::ocl::BruteForceMatcher_OCL_base::HammingDist)
|
||||||
|
IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
|
||||||
|
|
||||||
virtual void SetUp()
|
PARAM_TEST_CASE(BruteForceMatcher/*, NormCode*/, DistType, DescriptorSize)
|
||||||
{
|
{
|
||||||
//normCode = GET_PARAM(0);
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
distType = (cv::ocl::BruteForceMatcher_OCL_base::DistType)(int)GET_PARAM(0);
|
cv::ocl::BruteForceMatcher_OCL_base::DistType distType;
|
||||||
dim = GET_PARAM(1);
|
int normCode;
|
||||||
|
int dim;
|
||||||
|
|
||||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
int queryDescCount;
|
||||||
//CV_Assert(devnums > 0);
|
int countFactor;
|
||||||
|
|
||||||
queryDescCount = 300; // must be even number because we split train data in some cases in two
|
cv::Mat query, train;
|
||||||
countFactor = 4; // do not change it
|
|
||||||
|
|
||||||
cv::RNG& rng = cvtest::TS::ptr()->get_rng();
|
virtual void SetUp()
|
||||||
|
|
||||||
cv::Mat queryBuf, trainBuf;
|
|
||||||
|
|
||||||
// Generate query descriptors randomly.
|
|
||||||
// Descriptor vector elements are integer values.
|
|
||||||
queryBuf.create(queryDescCount, dim, CV_32SC1);
|
|
||||||
rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
|
|
||||||
queryBuf.convertTo(queryBuf, CV_32FC1);
|
|
||||||
|
|
||||||
// Generate train decriptors as follows:
|
|
||||||
// copy each query descriptor to train set countFactor times
|
|
||||||
// and perturb some one element of the copied descriptors in
|
|
||||||
// in ascending order. General boundaries of the perturbation
|
|
||||||
// are (0.f, 1.f).
|
|
||||||
trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1);
|
|
||||||
float step = 1.f / countFactor;
|
|
||||||
for (int qIdx = 0; qIdx < queryDescCount; qIdx++)
|
|
||||||
{
|
{
|
||||||
cv::Mat queryDescriptor = queryBuf.row(qIdx);
|
//normCode = GET_PARAM(0);
|
||||||
for (int c = 0; c < countFactor; c++)
|
distType = (cv::ocl::BruteForceMatcher_OCL_base::DistType)(int)GET_PARAM(0);
|
||||||
|
dim = GET_PARAM(1);
|
||||||
|
|
||||||
|
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||||
|
//CV_Assert(devnums > 0);
|
||||||
|
|
||||||
|
queryDescCount = 300; // must be even number because we split train data in some cases in two
|
||||||
|
countFactor = 4; // do not change it
|
||||||
|
|
||||||
|
cv::RNG &rng = cvtest::TS::ptr()->get_rng();
|
||||||
|
|
||||||
|
cv::Mat queryBuf, trainBuf;
|
||||||
|
|
||||||
|
// Generate query descriptors randomly.
|
||||||
|
// Descriptor vector elements are integer values.
|
||||||
|
queryBuf.create(queryDescCount, dim, CV_32SC1);
|
||||||
|
rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
|
||||||
|
queryBuf.convertTo(queryBuf, CV_32FC1);
|
||||||
|
|
||||||
|
// Generate train decriptors as follows:
|
||||||
|
// copy each query descriptor to train set countFactor times
|
||||||
|
// and perturb some one element of the copied descriptors in
|
||||||
|
// in ascending order. General boundaries of the perturbation
|
||||||
|
// are (0.f, 1.f).
|
||||||
|
trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1);
|
||||||
|
float step = 1.f / countFactor;
|
||||||
|
for (int qIdx = 0; qIdx < queryDescCount; qIdx++)
|
||||||
{
|
{
|
||||||
int tIdx = qIdx * countFactor + c;
|
cv::Mat queryDescriptor = queryBuf.row(qIdx);
|
||||||
cv::Mat trainDescriptor = trainBuf.row(tIdx);
|
for (int c = 0; c < countFactor; c++)
|
||||||
queryDescriptor.copyTo(trainDescriptor);
|
{
|
||||||
int elem = rng(dim);
|
int tIdx = qIdx * countFactor + c;
|
||||||
float diff = rng.uniform(step * c, step * (c + 1));
|
cv::Mat trainDescriptor = trainBuf.row(tIdx);
|
||||||
trainDescriptor.at<float>(0, elem) += diff;
|
queryDescriptor.copyTo(trainDescriptor);
|
||||||
|
int elem = rng(dim);
|
||||||
|
float diff = rng.uniform(step * c, step * (c + 1));
|
||||||
|
trainDescriptor.at<float>(0, elem) += diff;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
queryBuf.convertTo(query, CV_32F);
|
||||||
|
trainBuf.convertTo(train, CV_32F);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
queryBuf.convertTo(query, CV_32F);
|
TEST_P(BruteForceMatcher, Match_Single)
|
||||||
trainBuf.convertTo(train, CV_32F);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_P(BruteForceMatcher, Match_Single)
|
|
||||||
{
|
|
||||||
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
|
|
||||||
|
|
||||||
std::vector<cv::DMatch> matches;
|
|
||||||
matcher.match(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches);
|
|
||||||
|
|
||||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
|
||||||
|
|
||||||
int badCount = 0;
|
|
||||||
for (size_t i = 0; i < matches.size(); i++)
|
|
||||||
{
|
{
|
||||||
cv::DMatch match = matches[i];
|
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
|
||||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
|
|
||||||
badCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT_EQ(0, badCount);
|
std::vector<cv::DMatch> matches;
|
||||||
}
|
matcher.match(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches);
|
||||||
|
|
||||||
TEST_P(BruteForceMatcher, KnnMatch_2_Single)
|
|
||||||
{
|
|
||||||
const int knn = 2;
|
|
||||||
|
|
||||||
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
|
|
||||||
|
|
||||||
std::vector< std::vector<cv::DMatch> > matches;
|
|
||||||
matcher.knnMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, knn);
|
|
||||||
|
|
||||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
|
||||||
|
|
||||||
int badCount = 0;
|
|
||||||
for (size_t i = 0; i < matches.size(); i++)
|
|
||||||
{
|
|
||||||
if ((int)matches[i].size() != knn)
|
|
||||||
badCount++;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int localBadCount = 0;
|
|
||||||
for (int k = 0; k < knn; k++)
|
|
||||||
{
|
|
||||||
cv::DMatch match = matches[i][k];
|
|
||||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
|
|
||||||
localBadCount++;
|
|
||||||
}
|
|
||||||
badCount += localBadCount > 0 ? 1 : 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT_EQ(0, badCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
|
||||||
{
|
|
||||||
float radius;
|
|
||||||
if(distType == cv::ocl::BruteForceMatcher_OCL_base::L2Dist)
|
|
||||||
radius = 1.f / countFactor /countFactor;
|
|
||||||
else
|
|
||||||
radius = 1.f / countFactor;
|
|
||||||
|
|
||||||
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
|
|
||||||
|
|
||||||
// assume support atomic.
|
|
||||||
//if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
|
||||||
//{
|
|
||||||
// try
|
|
||||||
// {
|
|
||||||
// std::vector< std::vector<cv::DMatch> > matches;
|
|
||||||
// matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
|
|
||||||
// }
|
|
||||||
// catch (const cv::Exception& e)
|
|
||||||
// {
|
|
||||||
// ASSERT_EQ(CV_StsNotImplemented, e.code);
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
//else
|
|
||||||
{
|
|
||||||
std::vector< std::vector<cv::DMatch> > matches;
|
|
||||||
matcher.radiusMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, radius);
|
|
||||||
|
|
||||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||||
|
|
||||||
int badCount = 0;
|
int badCount = 0;
|
||||||
for (size_t i = 0; i < matches.size(); i++)
|
for (size_t i = 0; i < matches.size(); i++)
|
||||||
{
|
{
|
||||||
if ((int)matches[i].size() != 1)
|
cv::DMatch match = matches[i];
|
||||||
{
|
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
|
||||||
badCount++;
|
badCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASSERT_EQ(0, badCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(BruteForceMatcher, KnnMatch_2_Single)
|
||||||
|
{
|
||||||
|
const int knn = 2;
|
||||||
|
|
||||||
|
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
|
||||||
|
|
||||||
|
std::vector< std::vector<cv::DMatch> > matches;
|
||||||
|
matcher.knnMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, knn);
|
||||||
|
|
||||||
|
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||||
|
|
||||||
|
int badCount = 0;
|
||||||
|
for (size_t i = 0; i < matches.size(); i++)
|
||||||
|
{
|
||||||
|
if ((int)matches[i].size() != knn)
|
||||||
|
badCount++;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cv::DMatch match = matches[i][0];
|
int localBadCount = 0;
|
||||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0))
|
for (int k = 0; k < knn; k++)
|
||||||
badCount++;
|
{
|
||||||
|
cv::DMatch match = matches[i][k];
|
||||||
|
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
|
||||||
|
localBadCount++;
|
||||||
|
}
|
||||||
|
badCount += localBadCount > 0 ? 1 : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT_EQ(0, badCount);
|
ASSERT_EQ(0, badCount);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
|
TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||||
//ALL_DEVICES,
|
{
|
||||||
testing::Values(DistType(cv::ocl::BruteForceMatcher_OCL_base::L1Dist), DistType(cv::ocl::BruteForceMatcher_OCL_base::L2Dist)),
|
float radius;
|
||||||
testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304))));
|
if(distType == cv::ocl::BruteForceMatcher_OCL_base::L2Dist)
|
||||||
|
radius = 1.f / countFactor / countFactor;
|
||||||
|
else
|
||||||
|
radius = 1.f / countFactor;
|
||||||
|
|
||||||
|
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
|
||||||
|
|
||||||
|
// assume support atomic.
|
||||||
|
//if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
||||||
|
//{
|
||||||
|
// try
|
||||||
|
// {
|
||||||
|
// std::vector< std::vector<cv::DMatch> > matches;
|
||||||
|
// matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
|
||||||
|
// }
|
||||||
|
// catch (const cv::Exception& e)
|
||||||
|
// {
|
||||||
|
// ASSERT_EQ(CV_StsNotImplemented, e.code);
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
//else
|
||||||
|
{
|
||||||
|
std::vector< std::vector<cv::DMatch> > matches;
|
||||||
|
matcher.radiusMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, radius);
|
||||||
|
|
||||||
|
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||||
|
|
||||||
|
int badCount = 0;
|
||||||
|
for (size_t i = 0; i < matches.size(); i++)
|
||||||
|
{
|
||||||
|
if ((int)matches[i].size() != 1)
|
||||||
|
{
|
||||||
|
badCount++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cv::DMatch match = matches[i][0];
|
||||||
|
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
|
||||||
|
badCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_EQ(0, badCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
|
||||||
|
//ALL_DEVICES,
|
||||||
|
testing::Values(DistType(cv::ocl::BruteForceMatcher_OCL_base::L1Dist), DistType(cv::ocl::BruteForceMatcher_OCL_base::L2Dist)),
|
||||||
|
testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304))));
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
#endif
|
||||||
|
|||||||
@@ -44,8 +44,12 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
#define FILTER_IMAGE "../../../samples/gpu/road.png"
|
#ifdef WIN32
|
||||||
|
#define FILTER_IMAGE "C:/Users/Public/Pictures/Sample Pictures/Penguins.jpg"
|
||||||
|
#else
|
||||||
|
#define FILTER_IMAGE "/Users/Test/Valve_original.PNG" // user need to specify a valid image path
|
||||||
|
#endif
|
||||||
#define SHOW_RESULT 0
|
#define SHOW_RESULT 0
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
@@ -60,13 +64,13 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient)
|
|||||||
bool useL2gradient;
|
bool useL2gradient;
|
||||||
|
|
||||||
cv::Mat edges_gold;
|
cv::Mat edges_gold;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
apperture_size = GET_PARAM(0);
|
apperture_size = GET_PARAM(0);
|
||||||
useL2gradient = GET_PARAM(1);
|
useL2gradient = GET_PARAM(1);
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -78,31 +82,32 @@ TEST_P(Canny, Accuracy)
|
|||||||
double low_thresh = 50.0;
|
double low_thresh = 50.0;
|
||||||
double high_thresh = 100.0;
|
double high_thresh = 100.0;
|
||||||
|
|
||||||
cv::resize(img, img, cv::Size(512, 384));
|
cv::resize(img, img, cv::Size(512, 384));
|
||||||
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
|
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
|
||||||
|
|
||||||
cv::ocl::oclMat edges;
|
cv::ocl::oclMat edges;
|
||||||
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
|
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
|
||||||
|
|
||||||
char filename [100];
|
char filename [100];
|
||||||
sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient);
|
sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient);
|
||||||
|
|
||||||
cv::Mat edges_gold;
|
cv::Mat edges_gold;
|
||||||
cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
|
cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
|
||||||
|
|
||||||
#if SHOW_RESULT
|
#if SHOW_RESULT
|
||||||
cv::Mat edges_x2, ocl_edges(edges);
|
cv::Mat edges_x2, ocl_edges(edges);
|
||||||
edges_x2.create(edges.rows, edges.cols * 2, edges.type());
|
edges_x2.create(edges.rows, edges.cols * 2, edges.type());
|
||||||
edges_x2.setTo(0);
|
edges_x2.setTo(0);
|
||||||
cv::add(edges_gold,cv::Mat(edges_x2,cv::Rect(0,0,edges_gold.cols,edges_gold.rows)), cv::Mat(edges_x2,cv::Rect(0,0,edges_gold.cols,edges_gold.rows)));
|
cv::add(edges_gold, cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)));
|
||||||
cv::add(ocl_edges,cv::Mat(edges_x2,cv::Rect(edges_gold.cols,0,edges_gold.cols,edges_gold.rows)), cv::Mat(edges_x2,cv::Rect(edges_gold.cols,0,edges_gold.cols,edges_gold.rows)));
|
cv::add(ocl_edges, cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)));
|
||||||
cv::namedWindow("Canny result (left: cpu, right: ocl)");
|
cv::namedWindow("Canny result (left: cpu, right: ocl)");
|
||||||
cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2);
|
cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2);
|
||||||
cv::waitKey();
|
cv::waitKey();
|
||||||
#endif //OUTPUT_RESULT
|
#endif //OUTPUT_RESULT
|
||||||
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
|
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
|
||||||
testing::Values(AppertureSize(3), AppertureSize(5)),
|
testing::Values(AppertureSize(3), AppertureSize(5)),
|
||||||
testing::Values(L2gradient(false), L2gradient(true))));
|
testing::Values(L2gradient(false), L2gradient(true))));
|
||||||
|
#endif
|
||||||
|
|||||||
@@ -16,7 +16,7 @@
|
|||||||
//
|
//
|
||||||
// @Authors
|
// @Authors
|
||||||
// Chunpeng Zhang chunpeng@multicorewareinc.com
|
// Chunpeng Zhang chunpeng@multicorewareinc.com
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
@@ -59,13 +59,13 @@ PARAM_TEST_CASE(ColumnSum, cv::Size, bool )
|
|||||||
{
|
{
|
||||||
cv::Size size;
|
cv::Size size;
|
||||||
cv::Mat src;
|
cv::Mat src;
|
||||||
bool useRoi;
|
bool useRoi;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
size = GET_PARAM(0);
|
size = GET_PARAM(0);
|
||||||
useRoi = GET_PARAM(1);
|
useRoi = GET_PARAM(1);
|
||||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
}
|
}
|
||||||
@@ -74,10 +74,10 @@ PARAM_TEST_CASE(ColumnSum, cv::Size, bool )
|
|||||||
TEST_P(ColumnSum, Accuracy)
|
TEST_P(ColumnSum, Accuracy)
|
||||||
{
|
{
|
||||||
cv::Mat src = randomMat(size, CV_32FC1);
|
cv::Mat src = randomMat(size, CV_32FC1);
|
||||||
cv::ocl::oclMat d_dst;
|
cv::ocl::oclMat d_dst;
|
||||||
cv::ocl::oclMat d_src(src);
|
cv::ocl::oclMat d_src(src);
|
||||||
|
|
||||||
cv::ocl::columnSum(d_src,d_dst);
|
cv::ocl::columnSum(d_src, d_dst);
|
||||||
|
|
||||||
cv::Mat dst(d_dst);
|
cv::Mat dst(d_dst);
|
||||||
|
|
||||||
@@ -100,7 +100,7 @@ TEST_P(ColumnSum, Accuracy)
|
|||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ColumnSum, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ColumnSum, testing::Combine(
|
||||||
DIFFERENT_SIZES,testing::Values(Inverse(false),Inverse(true))));
|
DIFFERENT_SIZES, testing::Values(Inverse(false), Inverse(true))));
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -48,50 +48,50 @@ using namespace std;
|
|||||||
#ifdef HAVE_CLAMDFFT
|
#ifdef HAVE_CLAMDFFT
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// Dft
|
// Dft
|
||||||
PARAM_TEST_CASE(Dft, cv::Size, bool)
|
PARAM_TEST_CASE(Dft, cv::Size, bool)
|
||||||
{
|
{
|
||||||
cv::Size dft_size;
|
cv::Size dft_size;
|
||||||
bool dft_rows;
|
bool dft_rows;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
// CV_Assert(devnums > 0);
|
// CV_Assert(devnums > 0);
|
||||||
dft_size = GET_PARAM(0);
|
dft_size = GET_PARAM(0);
|
||||||
dft_rows = GET_PARAM(1);
|
dft_rows = GET_PARAM(1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(Dft, C2C)
|
TEST_P(Dft, C2C)
|
||||||
{
|
{
|
||||||
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
|
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
|
||||||
cv::Mat b_gold;
|
cv::Mat b_gold;
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
flags |= dft_rows ? cv::DFT_ROWS : 0;
|
flags |= dft_rows ? cv::DFT_ROWS : 0;
|
||||||
|
|
||||||
cv::ocl::oclMat d_b;
|
cv::ocl::oclMat d_b;
|
||||||
|
|
||||||
cv::dft(a, b_gold, flags);
|
cv::dft(a, b_gold, flags);
|
||||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
||||||
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
|
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST_P(Dft, R2CthenC2R)
|
TEST_P(Dft, R2CthenC2R)
|
||||||
{
|
{
|
||||||
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
|
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
|
||||||
|
|
||||||
int flags = 0;
|
|
||||||
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
|
|
||||||
|
|
||||||
cv::ocl::oclMat d_b, d_c;
|
int flags = 0;
|
||||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
|
||||||
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
|
|
||||||
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
|
cv::ocl::oclMat d_b, d_c;
|
||||||
|
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
||||||
|
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
|
||||||
|
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
|
||||||
testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
|
testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
|
||||||
testing::Values(false, true)));
|
testing::Values(false, true)));
|
||||||
|
|
||||||
#endif // HAVE_CLAMDFFT
|
#endif // HAVE_CLAMDFFT
|
||||||
|
|||||||
@@ -119,7 +119,7 @@ PARAM_TEST_CASE(FilterTestBase, MatType, bool)
|
|||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(1, mat1.cols);
|
roicols = rng.uniform(1, mat1.cols);
|
||||||
roirows = rng.uniform(1, mat1.rows);
|
roirows = rng.uniform(1, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@@ -211,10 +211,10 @@ PARAM_TEST_CASE(Blur, MatType, cv::Size, int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(2, mat1.cols);
|
roicols = rng.uniform(2, mat1.cols);
|
||||||
roirows = rng.uniform(2, mat1.rows);
|
roirows = rng.uniform(2, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@@ -311,10 +311,10 @@ PARAM_TEST_CASE(LaplacianTestBase, MatType, int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(2, mat.cols);
|
roicols = rng.uniform(2, mat.cols);
|
||||||
roirows = rng.uniform(2, mat.rows);
|
roirows = rng.uniform(2, mat.rows);
|
||||||
srcx = rng.uniform(0, mat.cols - roicols);
|
srcx = rng.uniform(0, mat.cols - roicols);
|
||||||
@@ -416,10 +416,10 @@ PARAM_TEST_CASE(ErodeDilateBase, MatType, bool)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(2, mat1.cols);
|
roicols = rng.uniform(2, mat1.cols);
|
||||||
roirows = rng.uniform(2, mat1.rows);
|
roirows = rng.uniform(2, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@@ -559,10 +559,10 @@ PARAM_TEST_CASE(Sobel, MatType, int, int, int, int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(2, mat1.cols);
|
roicols = rng.uniform(2, mat1.cols);
|
||||||
roirows = rng.uniform(2, mat1.rows);
|
roirows = rng.uniform(2, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@@ -663,10 +663,10 @@ PARAM_TEST_CASE(Scharr, MatType, int, int, int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(2, mat1.cols);
|
roicols = rng.uniform(2, mat1.cols);
|
||||||
roirows = rng.uniform(2, mat1.rows);
|
roirows = rng.uniform(2, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@@ -770,10 +770,10 @@ PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(2, mat1.cols);
|
roicols = rng.uniform(2, mat1.cols);
|
||||||
roirows = rng.uniform(2, mat1.rows);
|
roirows = rng.uniform(2, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@@ -822,13 +822,13 @@ TEST_P(GaussianBlur, Mat)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
|
INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||||
Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
|
Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
|
||||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
|
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
|
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(1, 3)));
|
Values(1, 3)));
|
||||||
|
|
||||||
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
|
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
|
||||||
@@ -840,20 +840,20 @@ INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC1), Values(
|
|||||||
INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
|
INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
|
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT,
|
Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT,
|
||||||
(MatType)cv::BORDER_REPLICATE)));
|
(MatType)cv::BORDER_REPLICATE)));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
|
INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1),
|
||||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
|
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
|
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||||
Values(cv::Size(3, 3), cv::Size(5, 5)),
|
Values(cv::Size(3, 3), cv::Size(5, 5)),
|
||||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
|
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
||||||
|
|||||||
@@ -48,38 +48,38 @@ using namespace std;
|
|||||||
#ifdef HAVE_CLAMDBLAS
|
#ifdef HAVE_CLAMDBLAS
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// GEMM
|
// GEMM
|
||||||
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
|
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
cv::Size mat_size;
|
cv::Size mat_size;
|
||||||
int flags;
|
int flags;
|
||||||
//vector<cv::ocl::Info> info;
|
//vector<cv::ocl::Info> info;
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
type = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
mat_size = GET_PARAM(1);
|
mat_size = GET_PARAM(1);
|
||||||
flags = GET_PARAM(2);
|
flags = GET_PARAM(2);
|
||||||
//cv::ocl::getDevice(info);
|
//cv::ocl::getDevice(info);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(Gemm, Accuracy)
|
TEST_P(Gemm, Accuracy)
|
||||||
{
|
{
|
||||||
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
|
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
|
||||||
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
|
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
|
||||||
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
|
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
|
||||||
|
|
||||||
cv::Mat dst;
|
cv::Mat dst;
|
||||||
cv::ocl::oclMat ocl_dst;
|
cv::ocl::oclMat ocl_dst;
|
||||||
|
|
||||||
cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
|
cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
|
||||||
cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
|
cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
|
EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
|
||||||
testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
|
testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
|
||||||
testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
|
testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
|
||||||
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
|
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -53,107 +53,114 @@ using namespace testing;
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
|
||||||
struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
|
struct getRect
|
||||||
|
{
|
||||||
|
Rect operator ()(const CvAvgComp &e) const
|
||||||
|
{
|
||||||
|
return e.rect;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
PARAM_TEST_CASE(HaarTestBase, int, int)
|
PARAM_TEST_CASE(HaarTestBase, int, int)
|
||||||
{
|
{
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
|
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
|
||||||
cv::CascadeClassifier cpucascade, cpunestedCascade;
|
cv::CascadeClassifier cpucascade, cpunestedCascade;
|
||||||
// Mat img;
|
// Mat img;
|
||||||
|
|
||||||
double scale;
|
double scale;
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
scale = 1.0;
|
scale = 1.0;
|
||||||
index=0;
|
index = 0;
|
||||||
string cascadeName="../../../data/haarcascades/haarcascade_frontalface_alt.xml";
|
string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
|
||||||
|
|
||||||
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
|
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
|
||||||
{
|
{
|
||||||
cout << "ERROR: Could not load classifier cascade" << endl;
|
cout << "ERROR: Could not load classifier cascade" << endl;
|
||||||
cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
|
cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
|
||||||
" [--scale[=<image scale>\n"
|
" [--scale[=<image scale>\n"
|
||||||
" [filename|camera_index]\n" << endl ;
|
" [filename|camera_index]\n" << endl ;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums>0);
|
//CV_Assert(devnums>0);
|
||||||
////if you want to use undefault device, set it here
|
////if you want to use undefault device, set it here
|
||||||
////setDevice(oclinfo[0]);
|
////setDevice(oclinfo[0]);
|
||||||
//cv::ocl::setBinpath("E:\\");
|
//cv::ocl::setBinpath("E:\\");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////faceDetect/////////////////////////////////////////////////
|
////////////////////////////////faceDetect/////////////////////////////////////////////////
|
||||||
|
|
||||||
struct Haar : HaarTestBase {};
|
struct Haar : HaarTestBase {};
|
||||||
|
|
||||||
TEST_F(Haar, FaceDetect)
|
TEST_F(Haar, FaceDetect)
|
||||||
{
|
{
|
||||||
string imgName = "../../../samples/c/lena.jpg";
|
string imgName = "../../../samples/c/lena.jpg";
|
||||||
Mat img = imread( imgName, 1 );
|
Mat img = imread( imgName, 1 );
|
||||||
|
|
||||||
if(img.empty())
|
if(img.empty())
|
||||||
{
|
{
|
||||||
std::cout << "Couldn't read test" << index <<".jpg" << std::endl;
|
std::cout << "Couldn't read test" << index << ".jpg" << std::endl;
|
||||||
return ;
|
return ;
|
||||||
}
|
}
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
double t = 0;
|
double t = 0;
|
||||||
vector<Rect> faces, oclfaces;
|
vector<Rect> faces, oclfaces;
|
||||||
|
|
||||||
const static Scalar colors[] = { CV_RGB(0,0,255),
|
const static Scalar colors[] = { CV_RGB(0, 0, 255),
|
||||||
CV_RGB(0,128,255),
|
CV_RGB(0, 128, 255),
|
||||||
CV_RGB(0,255,255),
|
CV_RGB(0, 255, 255),
|
||||||
CV_RGB(0,255,0),
|
CV_RGB(0, 255, 0),
|
||||||
CV_RGB(255,128,0),
|
CV_RGB(255, 128, 0),
|
||||||
CV_RGB(255,255,0),
|
CV_RGB(255, 255, 0),
|
||||||
CV_RGB(255,0,0),
|
CV_RGB(255, 0, 0),
|
||||||
CV_RGB(255,0,255)} ;
|
CV_RGB(255, 0, 255)
|
||||||
|
} ;
|
||||||
|
|
||||||
Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
|
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
|
||||||
MemStorage storage(cvCreateMemStorage(0));
|
MemStorage storage(cvCreateMemStorage(0));
|
||||||
cvtColor( img, gray, CV_BGR2GRAY );
|
cvtColor( img, gray, CV_BGR2GRAY );
|
||||||
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
|
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
|
||||||
equalizeHist( smallImg, smallImg );
|
equalizeHist( smallImg, smallImg );
|
||||||
|
|
||||||
|
|
||||||
cv::ocl::oclMat image;
|
cv::ocl::oclMat image;
|
||||||
CvSeq* _objects;
|
CvSeq *_objects;
|
||||||
image.upload(smallImg);
|
image.upload(smallImg);
|
||||||
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
|
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
|
||||||
3, 0
|
3, 0
|
||||||
|CV_HAAR_SCALE_IMAGE
|
| CV_HAAR_SCALE_IMAGE
|
||||||
, Size(30,30), Size(0, 0) );
|
, Size(30, 30), Size(0, 0) );
|
||||||
vector<CvAvgComp> vecAvgComp;
|
vector<CvAvgComp> vecAvgComp;
|
||||||
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
|
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
|
||||||
oclfaces.resize(vecAvgComp.size());
|
oclfaces.resize(vecAvgComp.size());
|
||||||
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
|
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
|
||||||
|
|
||||||
cpucascade.detectMultiScale( smallImg, faces, 1.1,
|
cpucascade.detectMultiScale( smallImg, faces, 1.1,
|
||||||
3, 0
|
3, 0
|
||||||
|CV_HAAR_SCALE_IMAGE
|
| CV_HAAR_SCALE_IMAGE
|
||||||
, Size(30,30), Size(0, 0) );
|
, Size(30, 30), Size(0, 0) );
|
||||||
EXPECT_EQ(faces.size(),oclfaces.size());
|
EXPECT_EQ(faces.size(), oclfaces.size());
|
||||||
/* for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
|
/* for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
|
||||||
{
|
{
|
||||||
Mat smallImgROI;
|
Mat smallImgROI;
|
||||||
Point center;
|
Point center;
|
||||||
Scalar color = colors[i%8];
|
Scalar color = colors[i%8];
|
||||||
int radius;
|
int radius;
|
||||||
center.x = cvRound((r->x + r->width*0.5)*scale);
|
center.x = cvRound((r->x + r->width*0.5)*scale);
|
||||||
center.y = cvRound((r->y + r->height*0.5)*scale);
|
center.y = cvRound((r->y + r->height*0.5)*scale);
|
||||||
radius = cvRound((r->width + r->height)*0.25*scale);
|
radius = cvRound((r->width + r->height)*0.25*scale);
|
||||||
circle( img, center, radius, color, 3, 8, 0 );
|
circle( img, center, radius, color, 3, 8, 0 );
|
||||||
} */
|
} */
|
||||||
//namedWindow("result");
|
//namedWindow("result");
|
||||||
//imshow("result",img);
|
//imshow("result",img);
|
||||||
//waitKey(0);
|
//waitKey(0);
|
||||||
//destroyAllWindows();
|
//destroyAllWindows();
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
||||||
|
|||||||
@@ -49,15 +49,15 @@ using namespace std;
|
|||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
|
||||||
PARAM_TEST_CASE(HOG,cv::Size,int)
|
PARAM_TEST_CASE(HOG, cv::Size, int)
|
||||||
{
|
{
|
||||||
cv::Size winSize;
|
cv::Size winSize;
|
||||||
int type;
|
int type;
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
winSize = GET_PARAM(0);
|
winSize = GET_PARAM(0);
|
||||||
type = GET_PARAM(1);
|
type = GET_PARAM(1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(HOG, GetDescriptors)
|
TEST_P(HOG, GetDescriptors)
|
||||||
@@ -114,7 +114,7 @@ TEST_P(HOG, GetDescriptors)
|
|||||||
bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
|
bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
|
||||||
{
|
{
|
||||||
return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
|
return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
|
||||||
(abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
|
(abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(HOG, Detect)
|
TEST_P(HOG, Detect)
|
||||||
@@ -166,21 +166,21 @@ TEST_P(HOG, Detect)
|
|||||||
|
|
||||||
// OpenCL detection
|
// OpenCL detection
|
||||||
std::vector<cv::Rect> d_found;
|
std::vector<cv::Rect> d_found;
|
||||||
ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8,8), cv::Size(0,0), 1.05, 2);
|
ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
|
||||||
|
|
||||||
// CPU detection
|
// CPU detection
|
||||||
std::vector<cv::Rect> found;
|
std::vector<cv::Rect> found;
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
hog.detectMultiScale(img, found, 0, cv::Size(8,8), cv::Size(0,0), 1.05, 2);
|
hog.detectMultiScale(img, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
|
||||||
break;
|
break;
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
default:
|
default:
|
||||||
hog.detectMultiScale(img_rgb, found, 0, cv::Size(8,8), cv::Size(0,0), 1.05, 2);
|
hog.detectMultiScale(img_rgb, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ground-truth rectangular people window
|
// Ground-truth rectangular people window
|
||||||
cv::Rect win1_64x128(231, 190, 72, 144);
|
cv::Rect win1_64x128(231, 190, 72, 144);
|
||||||
cv::Rect win2_64x128(621, 156, 97, 194);
|
cv::Rect win2_64x128(621, 156, 97, 194);
|
||||||
@@ -240,14 +240,14 @@ TEST_P(HOG, Detect)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
char s[100]={0};
|
char s[100] = {0};
|
||||||
EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3, s);
|
EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HOG, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HOG, testing::Combine(
|
||||||
testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
|
testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
|
||||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
|
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
|
||||||
|
|
||||||
|
|
||||||
#endif //HAVE_OPENCL
|
#endif //HAVE_OPENCL
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
|
|||||||
{
|
{
|
||||||
int t0, t1, t2;
|
int t0, t1, t2;
|
||||||
t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
|
t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
|
||||||
if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2)
|
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
|
||||||
{
|
{
|
||||||
s0 += t0;
|
s0 += t0;
|
||||||
s1 += t1;
|
s1 += t1;
|
||||||
@@ -134,7 +134,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
|
|||||||
rowCount++;
|
rowCount++;
|
||||||
}
|
}
|
||||||
t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
|
t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
|
||||||
if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2)
|
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
|
||||||
{
|
{
|
||||||
s0 += t0;
|
s0 += t0;
|
||||||
s1 += t1;
|
s1 += t1;
|
||||||
@@ -143,7 +143,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
|
|||||||
rowCount++;
|
rowCount++;
|
||||||
}
|
}
|
||||||
t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
|
t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
|
||||||
if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2)
|
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
|
||||||
{
|
{
|
||||||
s0 += t0;
|
s0 += t0;
|
||||||
s1 += t1;
|
s1 += t1;
|
||||||
@@ -152,7 +152,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
|
|||||||
rowCount++;
|
rowCount++;
|
||||||
}
|
}
|
||||||
t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
|
t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
|
||||||
if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2)
|
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
|
||||||
{
|
{
|
||||||
s0 += t0;
|
s0 += t0;
|
||||||
s1 += t1;
|
s1 += t1;
|
||||||
@@ -165,7 +165,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
|
|||||||
for(; x <= maxx; x++, ptr += 4)
|
for(; x <= maxx; x++, ptr += 4)
|
||||||
{
|
{
|
||||||
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
|
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
|
||||||
if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2)
|
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
|
||||||
{
|
{
|
||||||
s0 += t0;
|
s0 += t0;
|
||||||
s1 += t1;
|
s1 += t1;
|
||||||
@@ -191,7 +191,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
|
|||||||
s2 = cvFloor(s2 * icount);
|
s2 = cvFloor(s2 * icount);
|
||||||
|
|
||||||
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
|
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
|
||||||
tab[s0-c0+255] + tab[s1-c1+255] + tab[s2-c2+255] <= eps);
|
tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
|
||||||
|
|
||||||
//revise the pointer corresponding to the new (y0,x0)
|
//revise the pointer corresponding to the new (y0,x0)
|
||||||
revx = x1 - x0;
|
revx = x1 - x0;
|
||||||
@@ -388,10 +388,10 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(1, mat1.cols);
|
roicols = rng.uniform(1, mat1.cols);
|
||||||
roirows = rng.uniform(1, mat1.rows);
|
roirows = rng.uniform(1, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@@ -488,10 +488,10 @@ TEST_P(bilateralFilter, Mat)
|
|||||||
int radius = 9;
|
int radius = 9;
|
||||||
int d = 2 * radius + 1;
|
int d = 2 * radius + 1;
|
||||||
double sigmaspace = 20.0;
|
double sigmaspace = 20.0;
|
||||||
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE,cv::BORDER_REFLECT,cv::BORDER_WRAP,cv::BORDER_REFLECT_101};
|
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
|
||||||
const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"};
|
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
|
||||||
|
|
||||||
if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
|
if (mat1.depth() != CV_8U || mat1.type() != dst.type())
|
||||||
{
|
{
|
||||||
cout << "Unsupported type" << endl;
|
cout << "Unsupported type" << endl;
|
||||||
EXPECT_DOUBLE_EQ(0.0, 0.0);
|
EXPECT_DOUBLE_EQ(0.0, 0.0);
|
||||||
@@ -502,47 +502,41 @@ TEST_P(bilateralFilter, Mat)
|
|||||||
for(int j = 0; j < LOOP_TIMES; j++)
|
for(int j = 0; j < LOOP_TIMES; j++)
|
||||||
{
|
{
|
||||||
random_roi();
|
random_roi();
|
||||||
#ifdef RANDOMROI
|
if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= radius) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
|
||||||
if(((bordertype[i]!=cv::BORDER_CONSTANT) && (bordertype[i]!=cv::BORDER_REPLICATE))&&(mat1_roi.cols<=radius) || (mat1_roi.cols<=radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
|
{
|
||||||
{
|
continue;
|
||||||
continue;
|
}
|
||||||
}
|
//if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
|
||||||
if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
|
//{
|
||||||
{
|
// dst_roi.adjustROI(radius, radius, radius, radius);
|
||||||
dst_roi.adjustROI(radius, radius, radius, radius);
|
// cldst_roi.adjustROI(radius, radius, radius, radius);
|
||||||
cldst_roi.adjustROI(radius, radius, radius, radius);
|
//}
|
||||||
}
|
//else
|
||||||
else
|
//{
|
||||||
{
|
// continue;
|
||||||
continue;
|
//}
|
||||||
}
|
|
||||||
#endif
|
cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
|
||||||
cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i]|cv::BORDER_ISOLATED);
|
cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
|
||||||
cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i]|cv::BORDER_ISOLATED);
|
|
||||||
|
|
||||||
cv::Mat cpu_cldst;
|
cv::Mat cpu_cldst;
|
||||||
#ifndef RANDOMROI
|
cldst.download(cpu_cldst);
|
||||||
cldst_roi.download(cpu_cldst);
|
|
||||||
#else
|
|
||||||
cldst.download(cpu_cldst);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
char sss[1024];
|
char sss[1024];
|
||||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,radius=%d,boredertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, radius, borderstr[i]);
|
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,radius=%d,boredertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, radius, borderstr[i]);
|
||||||
|
//for(int i=0;i<dst.rows;i++)
|
||||||
|
//{
|
||||||
|
// for(int j=0;j<dst.cols*dst.channels();j++)
|
||||||
|
// {
|
||||||
|
// if(dst.at<uchar>(i,j)!=cpu_cldst.at<uchar>(i,j))
|
||||||
|
// cout<< i <<" "<< j <<" "<< (int)dst.at<uchar>(i,j)<<" "<< (int)cpu_cldst.at<uchar>(i,j)<<" ";
|
||||||
|
// }
|
||||||
|
// cout<<endl;
|
||||||
|
//}
|
||||||
|
|
||||||
|
EXPECT_MAT_NEAR(dst, cpu_cldst, 1.0, sss);
|
||||||
|
|
||||||
#ifndef RANDOMROI
|
|
||||||
EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0, sss);
|
|
||||||
#else
|
|
||||||
//for(int i=0;i<dst_roi.rows;i++)
|
|
||||||
//{
|
|
||||||
// for(int j=0;j<dst_roi.cols;j++)
|
|
||||||
// {
|
|
||||||
// cout<< (int)dst_roi.at<uchar>(i,j)<<" "<< (int)cpu_cldst.at<uchar>(i,j)<<" ";
|
|
||||||
// }
|
|
||||||
// cout<<endl;
|
|
||||||
//}
|
|
||||||
EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0, sss);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -555,13 +549,13 @@ struct CopyMakeBorder : ImgprocTestBase {};
|
|||||||
|
|
||||||
TEST_P(CopyMakeBorder, Mat)
|
TEST_P(CopyMakeBorder, Mat)
|
||||||
{
|
{
|
||||||
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE,cv::BORDER_REFLECT,cv::BORDER_WRAP,cv::BORDER_REFLECT_101};
|
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
|
||||||
const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"};
|
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
int top = rng.uniform(0, 10);
|
int top = rng.uniform(0, 10);
|
||||||
int bottom = rng.uniform(0, 10);
|
int bottom = rng.uniform(0, 10);
|
||||||
int left = rng.uniform(0, 10);
|
int left = rng.uniform(0, 10);
|
||||||
int right = rng.uniform(0, 10);
|
int right = rng.uniform(0, 10);
|
||||||
if (mat1.type() != dst.type())
|
if (mat1.type() != dst.type())
|
||||||
{
|
{
|
||||||
cout << "Unsupported type" << endl;
|
cout << "Unsupported type" << endl;
|
||||||
@@ -573,45 +567,45 @@ TEST_P(CopyMakeBorder, Mat)
|
|||||||
for(int j = 0; j < LOOP_TIMES; j++)
|
for(int j = 0; j < LOOP_TIMES; j++)
|
||||||
{
|
{
|
||||||
random_roi();
|
random_roi();
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
if(((bordertype[i]!=cv::BORDER_CONSTANT) && (bordertype[i]!=cv::BORDER_REPLICATE))&&(mat1_roi.cols<=left) || (mat1_roi.cols<=right) || (mat1_roi.rows <= top) || (mat1_roi.rows <= bottom))
|
if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= left) || (mat1_roi.cols <= right) || (mat1_roi.rows <= top) || (mat1_roi.rows <= bottom))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((dstx>=left) && (dsty >= top) && (dstx+cldst_roi.cols+right <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+bottom <= cldst_roi.wholerows))
|
if((dstx >= left) && (dsty >= top) && (dstx + cldst_roi.cols + right <= cldst_roi.wholecols) && (dsty + cldst_roi.rows + bottom <= cldst_roi.wholerows))
|
||||||
{
|
{
|
||||||
dst_roi.adjustROI(top, bottom, left, right);
|
dst_roi.adjustROI(top, bottom, left, right);
|
||||||
cldst_roi.adjustROI(top, bottom, left, right);
|
cldst_roi.adjustROI(top, bottom, left, right);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i]| cv::BORDER_ISOLATED, cv::Scalar(1.0));
|
cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0));
|
||||||
cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i]| cv::BORDER_ISOLATED, cv::Scalar(1.0));
|
cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0));
|
||||||
|
|
||||||
cv::Mat cpu_cldst;
|
cv::Mat cpu_cldst;
|
||||||
#ifndef RANDOMROI
|
#ifndef RANDOMROI
|
||||||
cldst_roi.download(cpu_cldst);
|
cldst_roi.download(cpu_cldst);
|
||||||
#else
|
#else
|
||||||
cldst.download(cpu_cldst);
|
cldst.download(cpu_cldst);
|
||||||
#endif
|
#endif
|
||||||
char sss[1024];
|
char sss[1024];
|
||||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,top=%d,bottom=%d,left=%d,right=%d, bordertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, top, bottom, left, right,borderstr[i]);
|
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,top=%d,bottom=%d,left=%d,right=%d, bordertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, top, bottom, left, right, borderstr[i]);
|
||||||
#ifndef RANDOMROI
|
#ifndef RANDOMROI
|
||||||
EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0, sss);
|
EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0, sss);
|
||||||
#else
|
#else
|
||||||
//for(int i=0;i<dst.rows;i++)
|
//for(int i=0;i<dst.rows;i++)
|
||||||
//{
|
//{
|
||||||
//for(int j=0;j<dst.cols;j++)
|
//for(int j=0;j<dst.cols;j++)
|
||||||
//{
|
//{
|
||||||
// cout<< (int)dst.at<uchar>(i,j)<<" ";
|
// cout<< (int)dst.at<uchar>(i,j)<<" ";
|
||||||
//}
|
//}
|
||||||
//cout<<endl;
|
//cout<<endl;
|
||||||
//}
|
//}
|
||||||
EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0, sss);
|
EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0, sss);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -754,10 +748,10 @@ PARAM_TEST_CASE(WarpTestBase, MatType, int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
src_roicols = rng.uniform(1, mat1.cols);
|
src_roicols = rng.uniform(1, mat1.cols);
|
||||||
src_roirows = rng.uniform(1, mat1.rows);
|
src_roirows = rng.uniform(1, mat1.rows);
|
||||||
dst_roicols = rng.uniform(1, dst.cols);
|
dst_roicols = rng.uniform(1, dst.cols);
|
||||||
@@ -872,7 +866,7 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int)
|
|||||||
cv::Mat map2;
|
cv::Mat map2;
|
||||||
|
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
int src_roicols;
|
int src_roicols;
|
||||||
int src_roirows;
|
int src_roirows;
|
||||||
int dst_roicols;
|
int dst_roicols;
|
||||||
@@ -915,7 +909,7 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int)
|
|||||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
|
|
||||||
cv::RNG& rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT);
|
cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT);
|
||||||
cv::Size dstSize = cv::Size(MWIDTH, MHEIGHT);
|
cv::Size dstSize = cv::Size(MWIDTH, MHEIGHT);
|
||||||
cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT);
|
cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT);
|
||||||
@@ -937,31 +931,31 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int)
|
|||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cout<<"The wrong input type"<<endl;
|
cout << "The wrong input type" << endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
dst = randomMat(rng, map1Size, srcType, min, max, false);
|
dst = randomMat(rng, map1Size, srcType, min, max, false);
|
||||||
switch (src.channels())
|
switch (src.channels())
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
val = cv::Scalar(rng.uniform(0.0, 10.0), 0, 0, 0);
|
val = cv::Scalar(rng.uniform(0.0, 10.0), 0, 0, 0);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0);
|
val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0);
|
val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0));
|
val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
cv::RNG& rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
|
|
||||||
dst_roicols = rng.uniform(1, dst.cols);
|
dst_roicols = rng.uniform(1, dst.cols);
|
||||||
dst_roirows = rng.uniform(1, dst.rows);
|
dst_roirows = rng.uniform(1, dst.rows);
|
||||||
@@ -969,7 +963,7 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int)
|
|||||||
src_roicols = rng.uniform(1, src.cols);
|
src_roicols = rng.uniform(1, src.cols);
|
||||||
src_roirows = rng.uniform(1, src.rows);
|
src_roirows = rng.uniform(1, src.rows);
|
||||||
|
|
||||||
|
|
||||||
srcx = rng.uniform(0, src.cols - src_roicols);
|
srcx = rng.uniform(0, src.cols - src_roicols);
|
||||||
srcy = rng.uniform(0, src.rows - src_roirows);
|
srcy = rng.uniform(0, src.rows - src_roirows);
|
||||||
dstx = rng.uniform(0, dst.cols - dst_roicols);
|
dstx = rng.uniform(0, dst.cols - dst_roicols);
|
||||||
@@ -985,19 +979,19 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int)
|
|||||||
|
|
||||||
if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype))
|
if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype))
|
||||||
{
|
{
|
||||||
map1_roi = map1(Rect(map1x,map1y,map1_roicols,map1_roirows));
|
map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows));
|
||||||
gmap1_roi = map1_roi;
|
gmap1_roi = map1_roi;
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (map1Type == CV_32FC1 && map2Type == CV_32FC1)
|
else if (map1Type == CV_32FC1 && map2Type == CV_32FC1)
|
||||||
{
|
{
|
||||||
map1_roi = map1(Rect(map1x,map1y,map1_roicols,map1_roirows));
|
map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows));
|
||||||
gmap1_roi = map1_roi;
|
gmap1_roi = map1_roi;
|
||||||
map2_roi = map2(Rect(map2x,map2y,map2_roicols,map2_roirows));
|
map2_roi = map2(Rect(map2x, map2y, map2_roicols, map2_roirows));
|
||||||
gmap2_roi = map2_roi;
|
gmap2_roi = map2_roi;
|
||||||
}
|
}
|
||||||
src_roi = src(Rect(srcx,srcy,src_roicols,src_roirows));
|
src_roi = src(Rect(srcx, srcy, src_roicols, src_roirows));
|
||||||
dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
|
dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
|
||||||
gsrc_roi = src_roi;
|
gsrc_roi = src_roi;
|
||||||
gdst = dst;
|
gdst = dst;
|
||||||
gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows));
|
gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows));
|
||||||
@@ -1006,15 +1000,15 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int)
|
|||||||
|
|
||||||
TEST_P(Remap, Mat)
|
TEST_P(Remap, Mat)
|
||||||
{
|
{
|
||||||
if((interpolation == 1 && map1Type == CV_16SC2) ||(map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1))
|
if((interpolation == 1 && map1Type == CV_16SC2) || (map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1))
|
||||||
{
|
{
|
||||||
cout << "Don't support the dataType" << endl;
|
cout << "Don't support the dataType" << endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
|
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
|
||||||
const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
|
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
|
||||||
// for(int i = 0; i < sizeof(bordertype)/sizeof(int); i++)
|
// for(int i = 0; i < sizeof(bordertype)/sizeof(int); i++)
|
||||||
for(int j=0; j<100; j++)
|
for(int j = 0; j < 100; j++)
|
||||||
{
|
{
|
||||||
random_roi();
|
random_roi();
|
||||||
cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val);
|
cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val);
|
||||||
@@ -1025,11 +1019,11 @@ TEST_P(Remap, Mat)
|
|||||||
char sss[1024];
|
char sss[1024];
|
||||||
sprintf(sss, "src_roicols=%d,src_roirows=%d,dst_roicols=%d,dst_roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d", src_roicols, src_roirows, dst_roicols, dst_roirows, srcx, srcy, dstx, dsty);
|
sprintf(sss, "src_roicols=%d,src_roirows=%d,dst_roicols=%d,dst_roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d", src_roicols, src_roirows, dst_roicols, dst_roirows, srcx, srcy, dstx, dsty);
|
||||||
|
|
||||||
|
|
||||||
if(interpolation == 0)
|
if(interpolation == 0)
|
||||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0, sss);
|
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0, sss);
|
||||||
EXPECT_MAT_NEAR(dst, cpu_dst, 2.0, sss);
|
EXPECT_MAT_NEAR(dst, cpu_dst, 2.0, sss);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1105,14 +1099,14 @@ PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
src_roicols = rng.uniform(1, mat1.cols);
|
src_roicols = rng.uniform(1, mat1.cols);
|
||||||
src_roirows = rng.uniform(1, mat1.rows);
|
src_roirows = rng.uniform(1, mat1.rows);
|
||||||
dst_roicols = (int)(src_roicols*fx);
|
dst_roicols = (int)(src_roicols * fx);
|
||||||
dst_roirows = (int)(src_roirows*fy);
|
dst_roirows = (int)(src_roirows * fy);
|
||||||
src1x = rng.uniform(0, mat1.cols - src_roicols);
|
src1x = rng.uniform(0, mat1.cols - src_roicols);
|
||||||
src1y = rng.uniform(0, mat1.rows - src_roirows);
|
src1y = rng.uniform(0, mat1.rows - src_roirows);
|
||||||
dstx = rng.uniform(0, dst.cols - dst_roicols);
|
dstx = rng.uniform(0, dst.cols - dst_roicols);
|
||||||
@@ -1151,7 +1145,7 @@ TEST_P(Resize, Mat)
|
|||||||
|
|
||||||
// cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
|
// cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
|
||||||
// cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
|
// cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
|
||||||
if(dst_roicols<1||dst_roirows<1) continue;
|
if(dst_roicols < 1 || dst_roirows < 1) continue;
|
||||||
cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
|
cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
|
||||||
cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
|
cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
|
||||||
|
|
||||||
@@ -1215,10 +1209,10 @@ PARAM_TEST_CASE(Threshold, MatType, ThreshOp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(1, mat1.cols);
|
roicols = rng.uniform(1, mat1.cols);
|
||||||
roirows = rng.uniform(1, mat1.rows);
|
roirows = rng.uniform(1, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@@ -1411,15 +1405,15 @@ TEST_P(meanShiftProc, Mat)
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
//hist
|
//hist
|
||||||
void calcHistGold(const cv::Mat& src, cv::Mat& hist)
|
void calcHistGold(const cv::Mat &src, cv::Mat &hist)
|
||||||
{
|
{
|
||||||
hist.create(1, 256, CV_32SC1);
|
hist.create(1, 256, CV_32SC1);
|
||||||
hist.setTo(cv::Scalar::all(0));
|
hist.setTo(cv::Scalar::all(0));
|
||||||
|
|
||||||
int* hist_row = hist.ptr<int>();
|
int *hist_row = hist.ptr<int>();
|
||||||
for (int y = 0; y < src.rows; ++y)
|
for (int y = 0; y < src.rows; ++y)
|
||||||
{
|
{
|
||||||
const uchar* src_row = src.ptr(y);
|
const uchar *src_row = src.ptr(y);
|
||||||
|
|
||||||
for (int x = 0; x < src.cols; ++x)
|
for (int x = 0; x < src.cols; ++x)
|
||||||
++hist_row[src_row[x]];
|
++hist_row[src_row[x]];
|
||||||
@@ -1444,19 +1438,19 @@ PARAM_TEST_CASE(histTestBase, MatType, MatType)
|
|||||||
cv::ocl::oclMat gdst_hist;
|
cv::ocl::oclMat gdst_hist;
|
||||||
//ocl mat with roi
|
//ocl mat with roi
|
||||||
cv::ocl::oclMat gsrc_roi;
|
cv::ocl::oclMat gsrc_roi;
|
||||||
// std::vector<cv::ocl::Info> oclinfo;
|
// std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
type_src = GET_PARAM(0);
|
type_src = GET_PARAM(0);
|
||||||
|
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
cv::Size size = cv::Size(MWIDTH, MHEIGHT);
|
cv::Size size = cv::Size(MWIDTH, MHEIGHT);
|
||||||
|
|
||||||
src = randomMat(rng, size, type_src, 0, 256, false);
|
src = randomMat(rng, size, type_src, 0, 256, false);
|
||||||
|
|
||||||
// int devnums = getDevice(oclinfo);
|
// int devnums = getDevice(oclinfo);
|
||||||
// CV_Assert(devnums > 0);
|
// CV_Assert(devnums > 0);
|
||||||
//if you want to use undefault device, set it here
|
//if you want to use undefault device, set it here
|
||||||
//setDevice(oclinfo[0]);
|
//setDevice(oclinfo[0]);
|
||||||
}
|
}
|
||||||
@@ -1596,45 +1590,45 @@ void conv2( cv::Mat x, cv::Mat y, cv::Mat z)
|
|||||||
int N2 = y.rows;
|
int N2 = y.rows;
|
||||||
int M2 = y.cols;
|
int M2 = y.cols;
|
||||||
|
|
||||||
int i,j;
|
int i, j;
|
||||||
int m,n;
|
int m, n;
|
||||||
|
|
||||||
|
|
||||||
float *kerneldata = (float *)(x.data);
|
float *kerneldata = (float *)(x.data);
|
||||||
float *srcdata = (float *)(y.data);
|
float *srcdata = (float *)(y.data);
|
||||||
float *dstdata = (float *)(z.data);
|
float *dstdata = (float *)(z.data);
|
||||||
|
|
||||||
for(i=0;i<N2;i++)
|
for(i = 0; i < N2; i++)
|
||||||
for(j=0;j<M2;j++)
|
for(j = 0; j < M2; j++)
|
||||||
{
|
{
|
||||||
float temp =0;
|
float temp = 0;
|
||||||
for(m=0;m<N1;m++)
|
for(m = 0; m < N1; m++)
|
||||||
for(n=0;n<M1;n++)
|
for(n = 0; n < M1; n++)
|
||||||
{
|
{
|
||||||
int r, c;
|
int r, c;
|
||||||
r = min(max((i-N1/2+m), 0), N2-1);
|
r = min(max((i - N1 / 2 + m), 0), N2 - 1);
|
||||||
c = min(max((j-M1/2+n), 0), M2-1);
|
c = min(max((j - M1 / 2 + n), 0), M2 - 1);
|
||||||
temp += kerneldata[m*(x.step>>2)+n]*srcdata[r*(y.step>>2)+c];
|
temp += kerneldata[m * (x.step >> 2) + n] * srcdata[r * (y.step >> 2) + c];
|
||||||
}
|
}
|
||||||
dstdata[i*(z.step >> 2)+j]=temp;
|
dstdata[i * (z.step >> 2) + j] = temp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TEST_P(Convolve, Mat)
|
TEST_P(Convolve, Mat)
|
||||||
{
|
{
|
||||||
if(mat1.type()!=CV_32FC1)
|
if(mat1.type() != CV_32FC1)
|
||||||
{
|
{
|
||||||
cout<<"\tUnsupported type\t\n";
|
cout << "\tUnsupported type\t\n";
|
||||||
}
|
}
|
||||||
for(int j=0;j<LOOP_TIMES;j++)
|
for(int j = 0; j < LOOP_TIMES; j++)
|
||||||
{
|
{
|
||||||
random_roi();
|
random_roi();
|
||||||
cv::ocl::oclMat temp1;
|
cv::ocl::oclMat temp1;
|
||||||
cv::Mat kernel_cpu= mat2(Rect(0,0,7,7));
|
cv::Mat kernel_cpu = mat2(Rect(0, 0, 7, 7));
|
||||||
temp1 = kernel_cpu;
|
temp1 = kernel_cpu;
|
||||||
|
|
||||||
conv2(kernel_cpu,mat1_roi,dst_roi);
|
conv2(kernel_cpu, mat1_roi, dst_roi);
|
||||||
cv::ocl::convolve(gmat1,temp1,gdst);
|
cv::ocl::convolve(gmat1, temp1, gdst);
|
||||||
|
|
||||||
cv::Mat cpu_dst;
|
cv::Mat cpu_dst;
|
||||||
gdst_whole.download(cpu_dst);
|
gdst_whole.download(cpu_dst);
|
||||||
|
|
||||||
@@ -1661,31 +1655,38 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
|
|||||||
// NULL_TYPE,
|
// NULL_TYPE,
|
||||||
// NULL_TYPE,
|
// NULL_TYPE,
|
||||||
// Values(false))); // Values(false) is the reserved parameter
|
// Values(false))); // Values(false) is the reserved parameter
|
||||||
|
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
|
||||||
|
Values(CV_8UC1, CV_8UC3),
|
||||||
|
NULL_TYPE,
|
||||||
|
Values(CV_8UC1, CV_8UC3),
|
||||||
|
NULL_TYPE,
|
||||||
|
NULL_TYPE,
|
||||||
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
|
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
|
||||||
Values(CV_8UC1, CV_8UC4,CV_32SC1, CV_32SC4,CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
NULL_TYPE,
|
NULL_TYPE,
|
||||||
Values(CV_8UC1,CV_8UC4,CV_32SC1, CV_32SC4,CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
NULL_TYPE,
|
NULL_TYPE,
|
||||||
NULL_TYPE,
|
NULL_TYPE,
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
|
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
|
||||||
Values(CV_8UC1,CV_32FC1),
|
Values(CV_8UC1, CV_32FC1),
|
||||||
NULL_TYPE,
|
NULL_TYPE,
|
||||||
ONE_TYPE(CV_32FC1),
|
ONE_TYPE(CV_32FC1),
|
||||||
NULL_TYPE,
|
NULL_TYPE,
|
||||||
NULL_TYPE,
|
NULL_TYPE,
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
|
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
|
||||||
Values(CV_8UC1,CV_32FC1),
|
Values(CV_8UC1, CV_32FC1),
|
||||||
NULL_TYPE,
|
NULL_TYPE,
|
||||||
ONE_TYPE(CV_32FC1),
|
ONE_TYPE(CV_32FC1),
|
||||||
NULL_TYPE,
|
NULL_TYPE,
|
||||||
NULL_TYPE,
|
NULL_TYPE,
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
|
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
|
||||||
@@ -1697,21 +1698,21 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
|
|||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine(
|
INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
|
Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
|
||||||
(MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
|
(MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
|
||||||
(MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
|
(MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine
|
INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine
|
||||||
(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
|
(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
|
Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
|
||||||
(MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
|
(MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
|
||||||
(MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
|
(MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine(
|
INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(cv::Size()),
|
||||||
Values(0.5, 1.5, 2), Values(0.5, 1.5, 2), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR)));
|
Values(0.5, 1.5, 2), Values(0.5, 1.5, 2), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR)));
|
||||||
|
|
||||||
|
|
||||||
@@ -1728,27 +1729,27 @@ INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine(
|
|||||||
Values(6),
|
Values(6),
|
||||||
Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
|
Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
|
||||||
));
|
));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
|
INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
|
||||||
ONE_TYPE(CV_8UC4),
|
ONE_TYPE(CV_8UC4),
|
||||||
ONE_TYPE(CV_16SC2),
|
ONE_TYPE(CV_16SC2),
|
||||||
Values(5),
|
Values(5),
|
||||||
Values(6),
|
Values(6),
|
||||||
Values(cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 5, 1))
|
Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
|
||||||
));
|
));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine(
|
INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||||
Values(CV_32FC1, CV_16SC2, CV_32FC2),Values(-1,CV_32FC1),
|
Values(CV_32FC1, CV_16SC2, CV_32FC2), Values(-1, CV_32FC1),
|
||||||
Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR),
|
Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR),
|
||||||
Values((int)cv::BORDER_CONSTANT)));
|
Values((int)cv::BORDER_CONSTANT)));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine(
|
INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine(
|
||||||
ONE_TYPE(CV_8UC1),
|
ONE_TYPE(CV_8UC1),
|
||||||
ONE_TYPE(CV_32SC1) //no use
|
ONE_TYPE(CV_32SC1) //no use
|
||||||
));
|
));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine(
|
INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine(
|
||||||
Values(CV_32FC1, CV_32FC1),
|
Values(CV_32FC1, CV_32FC1),
|
||||||
|
|||||||
@@ -44,14 +44,15 @@
|
|||||||
|
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
#define PERF_TEST 0
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// MatchTemplate
|
// MatchTemplate
|
||||||
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
|
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
|
||||||
|
|
||||||
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
|
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
|
||||||
|
|
||||||
const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
|
const char *TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
|
||||||
|
|
||||||
#define MTEMP_SIZES testing::Values(cv::Size(128, 256), cv::Size(1024, 768))
|
#define MTEMP_SIZES testing::Values(cv::Size(128, 256), cv::Size(1024, 768))
|
||||||
|
|
||||||
@@ -61,7 +62,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
|
|||||||
cv::Size templ_size;
|
cv::Size templ_size;
|
||||||
int cn;
|
int cn;
|
||||||
int method;
|
int method;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
@@ -77,33 +78,33 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
|
|||||||
TEST_P(MatchTemplate8U, Accuracy)
|
TEST_P(MatchTemplate8U, Accuracy)
|
||||||
{
|
{
|
||||||
|
|
||||||
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
|
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
|
||||||
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
|
std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
|
||||||
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
|
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
|
||||||
std::cout << "Channels: " << cn << std::endl;
|
std::cout << "Channels: " << cn << std::endl;
|
||||||
|
|
||||||
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
|
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
|
||||||
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
|
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
|
||||||
|
|
||||||
cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
|
cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
|
||||||
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
|
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
|
||||||
|
|
||||||
cv::Mat dst_gold;
|
cv::Mat dst_gold;
|
||||||
cv::matchTemplate(image, templ, dst_gold, method);
|
cv::matchTemplate(image, templ, dst_gold, method);
|
||||||
|
|
||||||
char sss [100] = "";
|
char sss [100] = "";
|
||||||
|
|
||||||
cv::Mat mat_dst;
|
cv::Mat mat_dst;
|
||||||
dst.download(mat_dst);
|
dst.download(mat_dst);
|
||||||
|
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
|
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
|
||||||
|
|
||||||
#if PERF_TEST
|
#if PERF_TEST
|
||||||
{
|
{
|
||||||
P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
|
P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
|
||||||
P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
|
P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
|
||||||
}
|
}
|
||||||
#endif // PERF_TEST
|
#endif // PERF_TEST
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -113,7 +114,7 @@ PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMeth
|
|||||||
cv::Size templ_size;
|
cv::Size templ_size;
|
||||||
int cn;
|
int cn;
|
||||||
int method;
|
int method;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
@@ -132,42 +133,42 @@ TEST_P(MatchTemplate32F, Accuracy)
|
|||||||
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
|
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
|
||||||
|
|
||||||
cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
|
cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
|
||||||
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
|
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
|
||||||
|
|
||||||
cv::Mat dst_gold;
|
cv::Mat dst_gold;
|
||||||
cv::matchTemplate(image, templ, dst_gold, method);
|
cv::matchTemplate(image, templ, dst_gold, method);
|
||||||
|
|
||||||
char sss [100] = "";
|
char sss [100] = "";
|
||||||
|
|
||||||
cv::Mat mat_dst;
|
cv::Mat mat_dst;
|
||||||
dst.download(mat_dst);
|
dst.download(mat_dst);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
|
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
|
||||||
|
|
||||||
#if PERF_TEST
|
#if PERF_TEST
|
||||||
{
|
{
|
||||||
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
|
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
|
||||||
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
|
std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
|
||||||
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
|
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
|
||||||
std::cout << "Channels: " << cn << std::endl;
|
std::cout << "Channels: " << cn << std::endl;
|
||||||
P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
|
P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
|
||||||
P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
|
P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
|
||||||
}
|
}
|
||||||
#endif // PERF_TEST
|
#endif // PERF_TEST
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
|
//INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
|
||||||
testing::Combine(
|
// testing::Combine(
|
||||||
MTEMP_SIZES,
|
// MTEMP_SIZES,
|
||||||
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
// testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
||||||
testing::Values(Channels(1), Channels(3),Channels(4)),
|
// testing::Values(Channels(1), Channels(3), Channels(4)),
|
||||||
ALL_TEMPLATE_METHODS
|
// ALL_TEMPLATE_METHODS
|
||||||
)
|
// )
|
||||||
);
|
// );
|
||||||
|
//
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
|
//INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
|
||||||
MTEMP_SIZES,
|
// MTEMP_SIZES,
|
||||||
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
// testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
||||||
testing::Values(Channels(1), Channels(3),Channels(4)),
|
// testing::Values(Channels(1), Channels(3), Channels(4)),
|
||||||
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
|
// testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
|
||||||
|
#endif
|
||||||
|
|||||||
@@ -98,10 +98,10 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(1, mat.cols);
|
roicols = rng.uniform(1, mat.cols);
|
||||||
roirows = rng.uniform(1, mat.rows);
|
roirows = rng.uniform(1, mat.rows);
|
||||||
srcx = rng.uniform(0, mat.cols - roicols);
|
srcx = rng.uniform(0, mat.cols - roicols);
|
||||||
@@ -204,10 +204,10 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(1, mat.cols);
|
roicols = rng.uniform(1, mat.cols);
|
||||||
roirows = rng.uniform(1, mat.rows);
|
roirows = rng.uniform(1, mat.rows);
|
||||||
srcx = rng.uniform(0, mat.cols - roicols);
|
srcx = rng.uniform(0, mat.cols - roicols);
|
||||||
@@ -329,10 +329,10 @@ PARAM_TEST_CASE(SetToTestBase, MatType, bool)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(1, mat.cols);
|
roicols = rng.uniform(1, mat.cols);
|
||||||
roirows = rng.uniform(1, mat.rows);
|
roirows = rng.uniform(1, mat.rows);
|
||||||
srcx = rng.uniform(0, mat.cols - roicols);
|
srcx = rng.uniform(0, mat.cols - roicols);
|
||||||
@@ -440,10 +440,10 @@ PARAM_TEST_CASE(convertC3C4, MatType, cv::Size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(2, mat1.cols);
|
roicols = rng.uniform(2, mat1.cols);
|
||||||
roirows = rng.uniform(2, mat1.rows);
|
roirows = rng.uniform(2, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@@ -477,12 +477,12 @@ TEST_P(convertC3C4, Accuracy)
|
|||||||
for(int j = 0; j < LOOP_TIMES; j++)
|
for(int j = 0; j < LOOP_TIMES; j++)
|
||||||
{
|
{
|
||||||
//random_roi();
|
//random_roi();
|
||||||
int width = rng.uniform(2, MWIDTH);
|
int width = rng.uniform(2, MWIDTH);
|
||||||
int height = rng.uniform(2, MHEIGHT);
|
int height = rng.uniform(2, MHEIGHT);
|
||||||
cv::Size size(width, height);
|
cv::Size size(width, height);
|
||||||
|
|
||||||
mat1 = randomMat(rng, size, type, 0, 40, false);
|
mat1 = randomMat(rng, size, type, 0, 40, false);
|
||||||
gmat1 = mat1;
|
gmat1 = mat1;
|
||||||
cv::Mat cpu_dst;
|
cv::Mat cpu_dst;
|
||||||
gmat1.download(cpu_dst);
|
gmat1.download(cpu_dst);
|
||||||
char sss[1024];
|
char sss[1024];
|
||||||
@@ -493,18 +493,18 @@ TEST_P(convertC3C4, Accuracy)
|
|||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
|
INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4)));
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
|
INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
|
INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(MatrixOperation, convertC3C4, Combine(
|
INSTANTIATE_TEST_CASE_P(MatrixOperation, convertC3C4, Combine(
|
||||||
Values(CV_8UC3, CV_32SC3, CV_32FC3),
|
Values(CV_8UC3, CV_32SC3, CV_32FC3),
|
||||||
Values(cv::Size())));
|
Values(cv::Size())));
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -58,13 +58,13 @@ using namespace std;
|
|||||||
|
|
||||||
PARAM_TEST_CASE(PyrDown, MatType, int)
|
PARAM_TEST_CASE(PyrDown, MatType, int)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
int channels;
|
int channels;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
type = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
channels = GET_PARAM(1);
|
channels = GET_PARAM(1);
|
||||||
|
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
@@ -72,9 +72,9 @@ PARAM_TEST_CASE(PyrDown, MatType, int)
|
|||||||
////setDevice(oclinfo[0]);
|
////setDevice(oclinfo[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Cleanup()
|
void Cleanup()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -84,21 +84,21 @@ TEST_P(PyrDown, Mat)
|
|||||||
for(int j = 0; j < LOOP_TIMES; j++)
|
for(int j = 0; j < LOOP_TIMES; j++)
|
||||||
{
|
{
|
||||||
cv::Size size(MWIDTH, MHEIGHT);
|
cv::Size size(MWIDTH, MHEIGHT);
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
cv::Mat src=randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false);
|
cv::Mat src = randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false);
|
||||||
|
|
||||||
cv::ocl::oclMat gsrc(src), gdst;
|
cv::ocl::oclMat gsrc(src), gdst;
|
||||||
cv::Mat dst_cpu;
|
cv::Mat dst_cpu;
|
||||||
cv::pyrDown(src, dst_cpu);
|
cv::pyrDown(src, dst_cpu);
|
||||||
cv::ocl::pyrDown(gsrc, gdst);
|
cv::ocl::pyrDown(gsrc, gdst);
|
||||||
|
|
||||||
cv::Mat dst;
|
cv::Mat dst;
|
||||||
gdst.download(dst);
|
gdst.download(dst);
|
||||||
char s[1024]={0};
|
char s[1024] = {0};
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst, dst_cpu, dst.depth() == CV_32F ? 1e-4f : 1.0f, s);
|
EXPECT_MAT_NEAR(dst, dst_cpu, dst.depth() == CV_32F ? 1e-4f : 1.0f, s);
|
||||||
|
|
||||||
Cleanup();
|
Cleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ PARAM_TEST_CASE(Sparse, bool, bool)
|
|||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
UseSmart = GET_PARAM(0);
|
UseSmart = GET_PARAM(0);
|
||||||
useGray = GET_PARAM(0);
|
useGray = GET_PARAM(0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -94,28 +94,28 @@ TEST_P(Sparse, Mat)
|
|||||||
cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0);
|
cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0);
|
||||||
|
|
||||||
cv::ocl::oclMat d_pts;
|
cv::ocl::oclMat d_pts;
|
||||||
cv::Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void*)&pts[0]);
|
cv::Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]);
|
||||||
d_pts.upload(pts_mat);
|
d_pts.upload(pts_mat);
|
||||||
|
|
||||||
cv::ocl::PyrLKOpticalFlow pyrLK;
|
cv::ocl::PyrLKOpticalFlow pyrLK;
|
||||||
|
|
||||||
cv::ocl::oclMat oclFrame0;
|
cv::ocl::oclMat oclFrame0;
|
||||||
cv::ocl::oclMat oclFrame1;
|
cv::ocl::oclMat oclFrame1;
|
||||||
cv::ocl::oclMat d_nextPts;
|
cv::ocl::oclMat d_nextPts;
|
||||||
cv::ocl::oclMat d_status;
|
cv::ocl::oclMat d_status;
|
||||||
cv::ocl::oclMat d_err;
|
cv::ocl::oclMat d_err;
|
||||||
|
|
||||||
oclFrame0 = frame0;
|
oclFrame0 = frame0;
|
||||||
oclFrame1 = frame1;
|
oclFrame1 = frame1;
|
||||||
|
|
||||||
pyrLK.sparse(oclFrame0, oclFrame1, d_pts, d_nextPts, d_status, &d_err);
|
pyrLK.sparse(oclFrame0, oclFrame1, d_pts, d_nextPts, d_status, &d_err);
|
||||||
|
|
||||||
std::vector<cv::Point2f> nextPts(d_nextPts.cols);
|
std::vector<cv::Point2f> nextPts(d_nextPts.cols);
|
||||||
cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void*)&nextPts[0]);
|
cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void *)&nextPts[0]);
|
||||||
d_nextPts.download(nextPts_mat);
|
d_nextPts.download(nextPts_mat);
|
||||||
|
|
||||||
std::vector<unsigned char> status(d_status.cols);
|
std::vector<unsigned char> status(d_status.cols);
|
||||||
cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void*)&status[0]);
|
cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void *)&status[0]);
|
||||||
d_status.download(status_mat);
|
d_status.download(status_mat);
|
||||||
|
|
||||||
//std::vector<float> err(d_err.cols);
|
//std::vector<float> err(d_err.cols);
|
||||||
@@ -156,12 +156,12 @@ TEST_P(Sparse, Mat)
|
|||||||
double bad_ratio = static_cast<double>(mistmatch) / (nextPts.size() * 2);
|
double bad_ratio = static_cast<double>(mistmatch) / (nextPts.size() * 2);
|
||||||
|
|
||||||
ASSERT_LE(bad_ratio, 0.05f);
|
ASSERT_LE(bad_ratio, 0.05f);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Video, Sparse, Combine(
|
INSTANTIATE_TEST_CASE_P(Video, Sparse, Combine(
|
||||||
Values(false, true),
|
Values(false, true),
|
||||||
Values(false)));
|
Values(false)));
|
||||||
|
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
||||||
|
|
||||||
|
|||||||
@@ -56,37 +56,37 @@ using namespace std;
|
|||||||
|
|
||||||
PARAM_TEST_CASE(PyrUp, MatType, int)
|
PARAM_TEST_CASE(PyrUp, MatType, int)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
int channels;
|
int channels;
|
||||||
//std::vector<cv::ocl::Info> oclinfo;
|
//std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
//int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
//int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||||
//CV_Assert(devnums > 0);
|
//CV_Assert(devnums > 0);
|
||||||
type = GET_PARAM(0);
|
type = GET_PARAM(0);
|
||||||
channels = GET_PARAM(1);
|
channels = GET_PARAM(1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(PyrUp,Accuracy)
|
TEST_P(PyrUp, Accuracy)
|
||||||
{
|
{
|
||||||
for(int j = 0; j < LOOP_TIMES; j++)
|
for(int j = 0; j < LOOP_TIMES; j++)
|
||||||
{
|
{
|
||||||
Size size(MWIDTH, MHEIGHT);
|
Size size(MWIDTH, MHEIGHT);
|
||||||
Mat src = randomMat(size,CV_MAKETYPE(type, channels));
|
Mat src = randomMat(size, CV_MAKETYPE(type, channels));
|
||||||
Mat dst_gold;
|
Mat dst_gold;
|
||||||
pyrUp(src,dst_gold);
|
pyrUp(src, dst_gold);
|
||||||
ocl::oclMat dst;
|
ocl::oclMat dst;
|
||||||
ocl::oclMat srcMat(src);
|
ocl::oclMat srcMat(src);
|
||||||
ocl::pyrUp(srcMat,dst);
|
ocl::pyrUp(srcMat, dst);
|
||||||
Mat cpu_dst;
|
Mat cpu_dst;
|
||||||
dst.download(cpu_dst);
|
dst.download(cpu_dst);
|
||||||
char s[100]={0};
|
char s[100] = {0};
|
||||||
|
|
||||||
|
EXPECT_MAT_NEAR(dst_gold, cpu_dst, (src.depth() == CV_32F ? 1e-4f : 1.0), s);
|
||||||
|
}
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_gold, cpu_dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -119,10 +119,10 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(1, mat1.cols);
|
roicols = rng.uniform(1, mat1.cols);
|
||||||
roirows = rng.uniform(1, mat1.rows);
|
roirows = rng.uniform(1, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@@ -130,8 +130,8 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int)
|
|||||||
src2x = rng.uniform(0, mat2.cols - roicols);
|
src2x = rng.uniform(0, mat2.cols - roicols);
|
||||||
src2y = rng.uniform(0, mat2.rows - roirows);
|
src2y = rng.uniform(0, mat2.rows - roirows);
|
||||||
src3x = rng.uniform(0, mat3.cols - roicols);
|
src3x = rng.uniform(0, mat3.cols - roicols);
|
||||||
src3y = rng.uniform(0, mat3.cols - roirows);
|
src3y = rng.uniform(0, mat3.rows - roirows);
|
||||||
src4x = rng.uniform(0, mat4.rows - roicols);
|
src4x = rng.uniform(0, mat4.cols - roicols);
|
||||||
src4y = rng.uniform(0, mat4.rows - roirows);
|
src4y = rng.uniform(0, mat4.rows - roirows);
|
||||||
dstx = rng.uniform(0, dst.cols - roicols);
|
dstx = rng.uniform(0, dst.cols - roicols);
|
||||||
dsty = rng.uniform(0, dst.rows - roirows);
|
dsty = rng.uniform(0, dst.rows - roirows);
|
||||||
@@ -194,13 +194,13 @@ TEST_P(Merge, Accuracy)
|
|||||||
dev_gsrc.push_back(gmat1);
|
dev_gsrc.push_back(gmat1);
|
||||||
|
|
||||||
if(channels >= 2)
|
if(channels >= 2)
|
||||||
dev_gsrc.push_back(gmat2);
|
dev_gsrc.push_back(gmat2);
|
||||||
|
|
||||||
if(channels >= 3)
|
if(channels >= 3)
|
||||||
dev_gsrc.push_back(gmat3);
|
dev_gsrc.push_back(gmat3);
|
||||||
|
|
||||||
if(channels >= 4)
|
if(channels >= 4)
|
||||||
dev_gsrc.push_back(gmat4);
|
dev_gsrc.push_back(gmat4);
|
||||||
|
|
||||||
cv::merge(dev_src, dst_roi);
|
cv::merge(dev_src, dst_roi);
|
||||||
cv::ocl::merge(dev_gsrc, gdst);
|
cv::ocl::merge(dev_gsrc, gdst);
|
||||||
@@ -287,10 +287,10 @@ PARAM_TEST_CASE(SplitTestBase, MatType, int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(1, mat.cols);
|
roicols = rng.uniform(1, mat.cols);
|
||||||
roirows = rng.uniform(1, mat.rows);
|
roirows = rng.uniform(1, mat.rows);
|
||||||
srcx = rng.uniform(0, mat.cols - roicols);
|
srcx = rng.uniform(0, mat.cols - roicols);
|
||||||
@@ -368,26 +368,26 @@ TEST_P(Split, Accuracy)
|
|||||||
sprintf(sss, "roicols=%d,roirows=%d,dst1x =%d,dsty=%d,dst2x =%d,dst2y=%d,dst3x =%d,dst3y=%d,dst4x =%d,dst4y=%d,srcx=%d,srcy=%d", roicols, roirows, dst1x , dst1y, dst2x , dst2y, dst3x , dst3y, dst4x , dst4y, srcx, srcy);
|
sprintf(sss, "roicols=%d,roirows=%d,dst1x =%d,dsty=%d,dst2x =%d,dst2y=%d,dst3x =%d,dst3y=%d,dst4x =%d,dst4y=%d,srcx=%d,srcy=%d", roicols, roirows, dst1x , dst1y, dst2x , dst2y, dst3x , dst3y, dst4x , dst4y, srcx, srcy);
|
||||||
|
|
||||||
if(channels >= 1)
|
if(channels >= 1)
|
||||||
EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.0, sss);
|
EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.0, sss);
|
||||||
|
|
||||||
if(channels >= 2)
|
if(channels >= 2)
|
||||||
EXPECT_MAT_NEAR(dst2, cpu_dst2, 0.0, sss);
|
EXPECT_MAT_NEAR(dst2, cpu_dst2, 0.0, sss);
|
||||||
|
|
||||||
if(channels >= 3)
|
if(channels >= 3)
|
||||||
EXPECT_MAT_NEAR(dst3, cpu_dst3, 0.0, sss);
|
EXPECT_MAT_NEAR(dst3, cpu_dst3, 0.0, sss);
|
||||||
|
|
||||||
if(channels >= 4)
|
if(channels >= 4)
|
||||||
EXPECT_MAT_NEAR(dst4, cpu_dst4, 0.0, sss);
|
EXPECT_MAT_NEAR(dst4, cpu_dst4, 0.0, sss);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
|
INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
|
||||||
Values(CV_8U, CV_32S, CV_32F), Values(1, 3,4)));
|
Values(CV_8U, CV_32S, CV_32F), Values(1, 3, 4)));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
|
INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
|
||||||
Values(CV_8U, CV_32S, CV_32F), Values(1, 3,4)));
|
Values(CV_8U, CV_32S, CV_32F), Values(1, 3, 4)));
|
||||||
|
|
||||||
|
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
||||||
|
|||||||
@@ -207,7 +207,7 @@ vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
|
|||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<MatType>& all_types()
|
const vector<MatType> &all_types()
|
||||||
{
|
{
|
||||||
static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
|
static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
|
||||||
|
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ using perf::MatType;
|
|||||||
std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
|
std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
|
||||||
|
|
||||||
//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
|
//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
|
||||||
const std::vector<MatType>& all_types();
|
const std::vector<MatType> &all_types();
|
||||||
|
|
||||||
class Inverse
|
class Inverse
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user