some host side optimizations to ocl::GaussianBlur
This commit is contained in:
parent
9060365f5e
commit
e05112a364
@ -47,6 +47,7 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
#include "mcwutil.hpp"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
@ -194,7 +195,9 @@ namespace
|
|||||||
inline void normalizeAnchor(int &anchor, int ksize)
|
inline void normalizeAnchor(int &anchor, int ksize)
|
||||||
{
|
{
|
||||||
if (anchor < 0)
|
if (anchor < 0)
|
||||||
|
{
|
||||||
anchor = ksize >> 1;
|
anchor = ksize >> 1;
|
||||||
|
}
|
||||||
|
|
||||||
CV_Assert(0 <= anchor && anchor < ksize);
|
CV_Assert(0 <= anchor && anchor < ksize);
|
||||||
}
|
}
|
||||||
@ -208,7 +211,10 @@ inline void normalizeAnchor(Point &anchor, const Size &ksize)
|
|||||||
inline void normalizeROI(Rect &roi, const Size &ksize, const Point &anchor, const Size &src_size)
|
inline void normalizeROI(Rect &roi, const Size &ksize, const Point &anchor, const Size &src_size)
|
||||||
{
|
{
|
||||||
if (roi == Rect(0, 0, -1, -1))
|
if (roi == Rect(0, 0, -1, -1))
|
||||||
|
{
|
||||||
roi = Rect(0, 0, src_size.width, src_size.height);
|
roi = Rect(0, 0, src_size.width, src_size.height);
|
||||||
|
}
|
||||||
|
|
||||||
CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
|
CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
|
||||||
CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
|
CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
|
||||||
CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height);
|
CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height);
|
||||||
@ -218,7 +224,11 @@ inline void normalizeROI(Rect &roi, const Size &ksize, const Point &anchor, cons
|
|||||||
inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8U, int *nDivisor = 0, bool reverse = false)
|
inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8U, int *nDivisor = 0, bool reverse = false)
|
||||||
{
|
{
|
||||||
int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1;
|
int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1;
|
||||||
if (nDivisor) *nDivisor = scale;
|
|
||||||
|
if (nDivisor)
|
||||||
|
{
|
||||||
|
*nDivisor = scale;
|
||||||
|
}
|
||||||
|
|
||||||
Mat temp(kernel.size(), type);
|
Mat temp(kernel.size(), type);
|
||||||
kernel.convertTo(temp, type, scale);
|
kernel.convertTo(temp, type, scale);
|
||||||
@ -227,6 +237,7 @@ inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8
|
|||||||
if (reverse)
|
if (reverse)
|
||||||
{
|
{
|
||||||
int count = cont_krnl.cols >> 1;
|
int count = cont_krnl.cols >> 1;
|
||||||
|
|
||||||
for (int i = 0; i < count; ++i)
|
for (int i = 0; i < count; ++i)
|
||||||
{
|
{
|
||||||
std::swap(cont_krnl.at<int>(0, i), cont_krnl.at<int>(0, cont_krnl.cols - 1 - i));
|
std::swap(cont_krnl.at<int>(0, i), cont_krnl.at<int>(0, cont_krnl.cols - 1 - i));
|
||||||
@ -353,7 +364,9 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c
|
|||||||
kernelName = "morph";
|
kernelName = "morph";
|
||||||
CV_Assert(localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
|
CV_Assert(localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
char s[64];
|
char s[64];
|
||||||
|
|
||||||
switch (src.type())
|
switch (src.type())
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
@ -373,6 +386,7 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c
|
|||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat, "unsupported type");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported type");
|
||||||
}
|
}
|
||||||
|
|
||||||
char compile_option[128];
|
char compile_option[128];
|
||||||
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s);
|
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s);
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
@ -425,7 +439,9 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize,
|
|||||||
kernelName = "morph";
|
kernelName = "morph";
|
||||||
CV_Assert(localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
|
CV_Assert(localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
char s[64];
|
char s[64];
|
||||||
|
|
||||||
switch (src.type())
|
switch (src.type())
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
@ -445,6 +461,7 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize,
|
|||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat, "unsupported type");
|
CV_Error(CV_StsUnsupportedFormat, "unsupported type");
|
||||||
}
|
}
|
||||||
|
|
||||||
char compile_option[128];
|
char compile_option[128];
|
||||||
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s);
|
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s);
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
@ -492,6 +509,7 @@ public:
|
|||||||
virtual void apply(const oclMat &src, oclMat &dst)
|
virtual void apply(const oclMat &src, oclMat &dst)
|
||||||
{
|
{
|
||||||
Filter2DEngine_GPU::apply(src, dst);
|
Filter2DEngine_GPU::apply(src, dst);
|
||||||
|
|
||||||
//if (iters > 1)
|
//if (iters > 1)
|
||||||
//{
|
//{
|
||||||
// Size wholesize;
|
// Size wholesize;
|
||||||
@ -545,6 +563,7 @@ void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point a
|
|||||||
{
|
{
|
||||||
CV_Error(CV_StsBadArg, "unsupported border type");
|
CV_Error(CV_StsBadArg, "unsupported border type");
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat kernel;
|
Mat kernel;
|
||||||
Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);
|
Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);
|
||||||
|
|
||||||
@ -572,7 +591,9 @@ void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point a
|
|||||||
iterations = 1;
|
iterations = 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
kernel = _kernel;
|
kernel = _kernel;
|
||||||
|
}
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations);
|
Ptr<FilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations);
|
||||||
|
|
||||||
@ -584,13 +605,18 @@ void cv::ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point an
|
|||||||
int borderType, const Scalar &borderValue)
|
int borderType, const Scalar &borderValue)
|
||||||
{
|
{
|
||||||
bool allZero = true;
|
bool allZero = true;
|
||||||
|
|
||||||
for (int i = 0; i < kernel.rows * kernel.cols; ++i)
|
for (int i = 0; i < kernel.rows * kernel.cols; ++i)
|
||||||
if (kernel.data[i] != 0)
|
if (kernel.data[i] != 0)
|
||||||
|
{
|
||||||
allZero = false;
|
allZero = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (allZero)
|
if (allZero)
|
||||||
{
|
{
|
||||||
kernel.data[0] = 1;
|
kernel.data[0] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue);
|
morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -604,6 +630,7 @@ void cv::ocl::morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &k
|
|||||||
int borderType, const Scalar &borderValue)
|
int borderType, const Scalar &borderValue)
|
||||||
{
|
{
|
||||||
oclMat temp;
|
oclMat temp;
|
||||||
|
|
||||||
switch (op)
|
switch (op)
|
||||||
{
|
{
|
||||||
case MORPH_ERODE:
|
case MORPH_ERODE:
|
||||||
@ -751,7 +778,9 @@ void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &ke
|
|||||||
{
|
{
|
||||||
|
|
||||||
if (ddepth < 0)
|
if (ddepth < 0)
|
||||||
|
{
|
||||||
ddepth = src.depth();
|
ddepth = src.depth();
|
||||||
|
}
|
||||||
|
|
||||||
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
||||||
|
|
||||||
@ -782,10 +811,10 @@ public:
|
|||||||
|
|
||||||
int cn = src.oclchannels();
|
int cn = src.oclchannels();
|
||||||
//dst.create(src_size, src_type);
|
//dst.create(src_size, src_type);
|
||||||
dst = Scalar(0.0);
|
//dst = Scalar(0.0);
|
||||||
//dstBuf.create(src_size, src_type);
|
//dstBuf.create(src_size, src_type);
|
||||||
dstBuf.create(src_size.height + ksize.height - 1, src_size.width, CV_MAKETYPE(CV_32F, cn));
|
dstBuf.create(src_size.height + ksize.height - 1, src_size.width, CV_MAKETYPE(CV_32F, cn));
|
||||||
dstBuf = Scalar(0.0);
|
//dstBuf = Scalar(0.0);
|
||||||
|
|
||||||
normalizeROI(roi, ksize, anchor, src_size);
|
normalizeROI(roi, ksize, anchor, src_size);
|
||||||
|
|
||||||
@ -835,6 +864,7 @@ void GPUFilterBox_8u_C1R(const oclMat &src, oclMat &dst,
|
|||||||
string kernelName = "boxFilter_C1_D0";
|
string kernelName = "boxFilter_C1_D0";
|
||||||
|
|
||||||
char btype[30];
|
char btype[30];
|
||||||
|
|
||||||
switch (borderType)
|
switch (borderType)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
@ -896,6 +926,7 @@ void GPUFilterBox_8u_C4R(const oclMat &src, oclMat &dst,
|
|||||||
string kernelName = "boxFilter_C4_D0";
|
string kernelName = "boxFilter_C4_D0";
|
||||||
|
|
||||||
char btype[30];
|
char btype[30];
|
||||||
|
|
||||||
switch (borderType)
|
switch (borderType)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
@ -957,6 +988,7 @@ void GPUFilterBox_32F_C1R(const oclMat &src, oclMat &dst,
|
|||||||
string kernelName = "boxFilter_C1_D5";
|
string kernelName = "boxFilter_C1_D5";
|
||||||
|
|
||||||
char btype[30];
|
char btype[30];
|
||||||
|
|
||||||
switch (borderType)
|
switch (borderType)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
@ -1019,6 +1051,7 @@ void GPUFilterBox_32F_C4R(const oclMat &src, oclMat &dst,
|
|||||||
string kernelName = "boxFilter_C4_D5";
|
string kernelName = "boxFilter_C4_D5";
|
||||||
|
|
||||||
char btype[30];
|
char btype[30];
|
||||||
|
|
||||||
switch (borderType)
|
switch (borderType)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
@ -1095,8 +1128,11 @@ void cv::ocl::boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
|
|||||||
Point anchor, int borderType)
|
Point anchor, int borderType)
|
||||||
{
|
{
|
||||||
int sdepth = src.depth(), cn = src.channels();
|
int sdepth = src.depth(), cn = src.channels();
|
||||||
|
|
||||||
if (ddepth < 0)
|
if (ddepth < 0)
|
||||||
|
{
|
||||||
ddepth = sdepth;
|
ddepth = sdepth;
|
||||||
|
}
|
||||||
|
|
||||||
dst.create(src.size(), CV_MAKETYPE(ddepth, cn));
|
dst.create(src.size(), CV_MAKETYPE(ddepth, cn));
|
||||||
|
|
||||||
@ -1161,6 +1197,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
|
|||||||
string kernelName = "row_filter";
|
string kernelName = "row_filter";
|
||||||
|
|
||||||
char btype[30];
|
char btype[30];
|
||||||
|
|
||||||
switch (bordertype)
|
switch (bordertype)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
@ -1179,12 +1216,14 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
|
|||||||
sprintf(btype, "BORDER_REFLECT_101");
|
sprintf(btype, "BORDER_REFLECT_101");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
char compile_option[128];
|
char compile_option[128];
|
||||||
sprintf(compile_option, "-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s", anchor, localThreads[0], localThreads[1], channels, btype);
|
sprintf(compile_option, "-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s", anchor, localThreads[0], localThreads[1], channels, btype);
|
||||||
|
|
||||||
size_t globalThreads[3];
|
size_t globalThreads[3];
|
||||||
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
|
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
|
||||||
globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2];
|
globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2];
|
||||||
|
|
||||||
if (src.depth() == CV_8U)
|
if (src.depth() == CV_8U)
|
||||||
{
|
{
|
||||||
switch (channels)
|
switch (channels)
|
||||||
@ -1205,6 +1244,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
|
|||||||
{
|
{
|
||||||
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
//sanity checks
|
//sanity checks
|
||||||
CV_Assert(clCxt == dst.clCxt);
|
CV_Assert(clCxt == dst.clCxt);
|
||||||
CV_Assert(src.cols == dst.cols);
|
CV_Assert(src.cols == dst.cols);
|
||||||
@ -1232,7 +1272,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
|
|||||||
args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy));
|
args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy));
|
||||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
|
args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option);
|
openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype)
|
Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype)
|
||||||
@ -1289,6 +1329,7 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
|
|||||||
string kernelName = "col_filter";
|
string kernelName = "col_filter";
|
||||||
|
|
||||||
char btype[30];
|
char btype[30];
|
||||||
|
|
||||||
switch (bordertype)
|
switch (bordertype)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
@ -1307,12 +1348,14 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
|
|||||||
sprintf(btype, "BORDER_REFLECT_101");
|
sprintf(btype, "BORDER_REFLECT_101");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
char compile_option[256];
|
char compile_option[256];
|
||||||
|
|
||||||
|
|
||||||
size_t globalThreads[3];
|
size_t globalThreads[3];
|
||||||
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
|
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
|
||||||
globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2];
|
globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2];
|
||||||
|
|
||||||
if (dst.depth() == CV_8U)
|
if (dst.depth() == CV_8U)
|
||||||
{
|
{
|
||||||
switch (channels)
|
switch (channels)
|
||||||
@ -1338,6 +1381,7 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
||||||
|
|
||||||
switch (dst.type())
|
switch (dst.type())
|
||||||
{
|
{
|
||||||
case CV_32SC1:
|
case CV_32SC1:
|
||||||
@ -1446,6 +1490,7 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat
|
|||||||
if ((bordertype & cv::BORDER_ISOLATED) != 0)
|
if ((bordertype & cv::BORDER_ISOLATED) != 0)
|
||||||
{
|
{
|
||||||
bordertype &= ~cv::BORDER_ISOLATED;
|
bordertype &= ~cv::BORDER_ISOLATED;
|
||||||
|
|
||||||
if ((bordertype != cv::BORDER_CONSTANT) &&
|
if ((bordertype != cv::BORDER_CONSTANT) &&
|
||||||
(bordertype != cv::BORDER_REPLICATE))
|
(bordertype != cv::BORDER_REPLICATE))
|
||||||
{
|
{
|
||||||
@ -1453,8 +1498,12 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ddepth < 0)
|
if (ddepth < 0)
|
||||||
|
{
|
||||||
ddepth = src.depth();
|
ddepth = src.depth();
|
||||||
|
}
|
||||||
|
|
||||||
//CV_Assert(ddepth == src.depth());
|
//CV_Assert(ddepth == src.depth());
|
||||||
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
||||||
|
|
||||||
@ -1482,10 +1531,15 @@ void cv::ocl::Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy,
|
|||||||
// usually the smoothing part is the slowest to compute,
|
// usually the smoothing part is the slowest to compute,
|
||||||
// so try to scale it instead of the faster differenciating part
|
// so try to scale it instead of the faster differenciating part
|
||||||
if (dx == 0)
|
if (dx == 0)
|
||||||
|
{
|
||||||
kx *= scale;
|
kx *= scale;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
ky *= scale;
|
ky *= scale;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Mat kx_, ky_;
|
// Mat kx_, ky_;
|
||||||
//ky.convertTo(ky_,CV_32S,1<<8);
|
//ky.convertTo(ky_,CV_32S,1<<8);
|
||||||
//kx.convertTo(kx_,CV_32S,1<<8);
|
//kx.convertTo(kx_,CV_32S,1<<8);
|
||||||
@ -1503,10 +1557,14 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy,
|
|||||||
// usually the smoothing part is the slowest to compute,
|
// usually the smoothing part is the slowest to compute,
|
||||||
// so try to scale it instead of the faster differenciating part
|
// so try to scale it instead of the faster differenciating part
|
||||||
if (dx == 0)
|
if (dx == 0)
|
||||||
|
{
|
||||||
kx *= scale;
|
kx *= scale;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
ky *= scale;
|
ky *= scale;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Mat kx_, ky_;
|
// Mat kx_, ky_;
|
||||||
//ky.convertTo(ky_,CV_32S,1<<8);
|
//ky.convertTo(ky_,CV_32S,1<<8);
|
||||||
@ -1531,8 +1589,12 @@ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, d
|
|||||||
{2, 0, 2, 0, -8, 0, 2, 0, 2}
|
{2, 0, 2, 0, -8, 0, 2, 0, 2}
|
||||||
};
|
};
|
||||||
Mat kernel(3, 3, CV_32S, (void *)K[ksize == 3]);
|
Mat kernel(3, 3, CV_32S, (void *)K[ksize == 3]);
|
||||||
|
|
||||||
if (scale != 1)
|
if (scale != 1)
|
||||||
|
{
|
||||||
kernel *= scale;
|
kernel *= scale;
|
||||||
|
}
|
||||||
|
|
||||||
filter2D(src, dst, ddepth, kernel, Point(-1, -1));
|
filter2D(src, dst, ddepth, kernel, Point(-1, -1));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1544,13 +1606,20 @@ Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, do
|
|||||||
int depth = CV_MAT_DEPTH(type);
|
int depth = CV_MAT_DEPTH(type);
|
||||||
|
|
||||||
if (sigma2 <= 0)
|
if (sigma2 <= 0)
|
||||||
|
{
|
||||||
sigma2 = sigma1;
|
sigma2 = sigma1;
|
||||||
|
}
|
||||||
|
|
||||||
// automatic detection of kernel size from sigma
|
// automatic detection of kernel size from sigma
|
||||||
if (ksize.width <= 0 && sigma1 > 0)
|
if (ksize.width <= 0 && sigma1 > 0)
|
||||||
|
{
|
||||||
ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
|
ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
|
||||||
|
}
|
||||||
|
|
||||||
if (ksize.height <= 0 && sigma2 > 0)
|
if (ksize.height <= 0 && sigma2 > 0)
|
||||||
|
{
|
||||||
ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
|
ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
|
||||||
|
}
|
||||||
|
|
||||||
CV_Assert(ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1);
|
CV_Assert(ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1);
|
||||||
|
|
||||||
@ -1559,10 +1628,16 @@ Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, do
|
|||||||
|
|
||||||
Mat kx = getGaussianKernel(ksize.width, sigma1, std::max(depth, CV_32F));
|
Mat kx = getGaussianKernel(ksize.width, sigma1, std::max(depth, CV_32F));
|
||||||
Mat ky;
|
Mat ky;
|
||||||
|
|
||||||
if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON)
|
if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON)
|
||||||
|
{
|
||||||
ky = kx;
|
ky = kx;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));
|
ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));
|
||||||
|
}
|
||||||
|
|
||||||
//Mat kx_, ky_;
|
//Mat kx_, ky_;
|
||||||
//kx.convertTo(kx_,CV_32S,1<<8);
|
//kx.convertTo(kx_,CV_32S,1<<8);
|
||||||
//ky.convertTo(ky_,CV_32S,1<<8);
|
//ky.convertTo(ky_,CV_32S,1<<8);
|
||||||
@ -1576,11 +1651,13 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
|
|||||||
src.copyTo(dst);
|
src.copyTo(dst);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
|
if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
|
||||||
{
|
{
|
||||||
if ((bordertype & cv::BORDER_ISOLATED) != 0)
|
if ((bordertype & cv::BORDER_ISOLATED) != 0)
|
||||||
{
|
{
|
||||||
bordertype &= ~cv::BORDER_ISOLATED;
|
bordertype &= ~cv::BORDER_ISOLATED;
|
||||||
|
|
||||||
if ((bordertype != cv::BORDER_CONSTANT) &&
|
if ((bordertype != cv::BORDER_CONSTANT) &&
|
||||||
(bordertype != cv::BORDER_REPLICATE))
|
(bordertype != cv::BORDER_REPLICATE))
|
||||||
{
|
{
|
||||||
@ -1588,14 +1665,22 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
if (bordertype != BORDER_CONSTANT)
|
if (bordertype != BORDER_CONSTANT)
|
||||||
{
|
{
|
||||||
if (src.rows == 1)
|
if (src.rows == 1)
|
||||||
|
{
|
||||||
ksize.height = 1;
|
ksize.height = 1;
|
||||||
|
}
|
||||||
|
|
||||||
if (src.cols == 1)
|
if (src.cols == 1)
|
||||||
|
{
|
||||||
ksize.width = 1;
|
ksize.width = 1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
|
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
|
||||||
f->apply(src, dst);
|
f->apply(src, dst);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user