Merge pull request #2612 from grkutty:pullreq/140319-bilateral-b

This commit is contained in:
Andrey Pavlenko 2014-04-22 09:32:38 +04:00 committed by OpenCV Buildbot
commit 7c3d5b056a
3 changed files with 90 additions and 34 deletions

View File

@ -54,9 +54,11 @@
#error "cn should be <= 4"
#endif
//Read pixels as integers
// Intel Device - Read Pixels as floats
__kernel void bilateral(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__constant float * color_weight, __constant float * space_weight, __constant int * space_ofs)
__constant float * space_weight, __constant int * space_ofs)
{
int x = get_global_id(0);
int y = get_global_id(1);
@ -68,18 +70,55 @@ __kernel void bilateral(__global const uchar * src, int src_step, int src_offset
float_t sum = (float_t)(0.0f);
float wsum = 0.0f;
#ifdef INTEL_DEVICE
float_t val0 = convert_float_t(loadpix(src + src_index));
#else
int_t val0 = convert_int_t(loadpix(src + src_index));
#endif
#pragma unroll
for (int k = 0; k < maxk; k++ )
{
#ifdef INTEL_DEVICE
float_t val = convert_float_t(loadpix(src + src_index + space_ofs[k]));
float diff = SUM(fabs(val - val0));
#else
int_t val = convert_int_t(loadpix(src + src_index + space_ofs[k]));
uint_t diff = abs(val - val0);
float w = space_weight[k] * color_weight[SUM(diff)];
int diff = SUM(abs(val - val0));
#endif
float w = space_weight[k] * native_exp((float)(diff * diff * gauss_color_coeff));
sum += convert_float_t(val) * (float_t)(w);
wsum += w;
}
storepix(convert_uchar_t(sum / (float_t)(wsum)), dst + dst_index);
}
}
#ifdef INTEL_DEVICE
#if cn == 1
//for single channgel x4 sized images.
__kernel void bilateral_float4(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__constant float * space_weight, __constant int * space_ofs)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (y < dst_rows && x < dst_cols / 4 )
{
int src_index = ((y + radius) * src_step) + x * 4 + (radius + src_offset);
int dst_index = (y * dst_step) + x * 4 + dst_offset ;
float4 sum = 0.f, wsum = 0.f;
float4 val0 = convert_float4(vload4(0, src + src_index));
#pragma unroll
for (int k = 0; k < maxk; k++ )
{
float4 val = convert_float4(vload4(0, src + src_index + space_ofs[k]));
float4 w = space_weight[k] * native_exp((val - val0) * (val - val0) * gauss_color_coeff);
sum += val * w;
wsum += w;
}
sum = sum / wsum + .5f;
vstore4(convert_uchar4_rtz(sum), 0, dst + dst_index);
}
}
#endif
#endif

View File

@ -2345,18 +2345,11 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d,
return false;
copyMakeBorder(src, temp, radius, radius, radius, radius, borderType);
std::vector<float> _color_weight(cn * 256);
std::vector<float> _space_weight(d * d);
std::vector<int> _space_ofs(d * d);
float * const color_weight = &_color_weight[0];
float * const space_weight = &_space_weight[0];
int * const space_ofs = &_space_ofs[0];
// initialize color-related bilateral filter coefficients
for( i = 0; i < 256 * cn; i++ )
color_weight[i] = (float)std::exp(i * i * gauss_color_coeff);
// initialize space-related bilateral filter coefficients
for( i = -radius, maxk = 0; i <= radius; i++ )
for( j = -radius; j <= radius; j++ )
@ -2370,36 +2363,45 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d,
char cvt[3][40];
String cnstr = cn > 1 ? format("%d", cn) : "";
ocl::Kernel k("bilateral", ocl::imgproc::bilateral_oclsrc,
format("-D radius=%d -D maxk=%d -D cn=%d -D int_t=%s -D uint_t=uint%s -D convert_int_t=%s"
" -D uchar_t=%s -D float_t=%s -D convert_float_t=%s -D convert_uchar_t=%s",
radius, maxk, cn, ocl::typeToStr(CV_32SC(cn)), cnstr.c_str(),
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]),
ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)),
ocl::convertTypeStr(CV_32S, CV_32F, cn, cvt[1]),
ocl::convertTypeStr(CV_32F, CV_8U, cn, cvt[2])));
String kernelName("bilateral");
size_t sizeDiv = 1;
if ((ocl::Device::getDefault().isIntel()) &&
(ocl::Device::getDefault().type() == ocl::Device::TYPE_GPU))
{
//Intel GPU
if (dst.cols % 4 == 0 && cn == 1) // For single channel x4 sized images.
{
kernelName = "bilateral_float4";
sizeDiv = 4;
}
}
ocl::Kernel k(kernelName.c_str(), ocl::imgproc::bilateral_oclsrc,
format("-D radius=%d -D maxk=%d -D cn=%d -D int_t=%s -D uint_t=uint%s -D convert_int_t=%s"
" -D uchar_t=%s -D float_t=%s -D convert_float_t=%s -D convert_uchar_t=%s -D gauss_color_coeff=%f",
radius, maxk, cn, ocl::typeToStr(CV_32SC(cn)), cnstr.c_str(),
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]),
ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)),
ocl::convertTypeStr(CV_32S, CV_32F, cn, cvt[1]),
ocl::convertTypeStr(CV_32F, CV_8U, cn, cvt[2]), gauss_color_coeff));
if (k.empty())
return false;
Mat mcolor_weight(1, cn * 256, CV_32FC1, color_weight);
Mat mspace_weight(1, d * d, CV_32FC1, space_weight);
Mat mspace_ofs(1, d * d, CV_32SC1, space_ofs);
UMat ucolor_weight, uspace_weight, uspace_ofs;
mcolor_weight.copyTo(ucolor_weight);
mspace_weight.copyTo(uspace_weight);
mspace_ofs.copyTo(uspace_ofs);
k.args(ocl::KernelArg::ReadOnlyNoSize(temp), ocl::KernelArg::WriteOnly(dst),
ocl::KernelArg::PtrReadOnly(ucolor_weight),
ocl::KernelArg::PtrReadOnly(uspace_weight),
ocl::KernelArg::PtrReadOnly(uspace_ofs));
size_t globalsize[2] = { dst.cols, dst.rows };
size_t globalsize[2] = { dst.cols / sizeDiv, dst.rows };
return k.run(2, globalsize, NULL, false);
}
#endif
static void
bilateralFilter_8u( const Mat& src, Mat& dst, int d,
double sigma_color, double sigma_space,

View File

@ -62,12 +62,14 @@ PARAM_TEST_CASE(FilterTestBase, MatType,
Size, // dx, dy
BorderType, // border type
double, // optional parameter
bool) // roi or not
bool, // roi or not
int) //width multiplier
{
int type, borderType, ksize;
Size size;
double param;
bool useRoi;
int widthMultiple;
TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(dst);
@ -80,6 +82,7 @@ PARAM_TEST_CASE(FilterTestBase, MatType,
borderType = GET_PARAM(3);
param = GET_PARAM(4);
useRoi = GET_PARAM(5);
widthMultiple = GET_PARAM(6);
}
void random_roi(int minSize = 1)
@ -88,6 +91,9 @@ PARAM_TEST_CASE(FilterTestBase, MatType,
minSize = ksize;
Size roiSize = randomSize(minSize, MAX_VALUE);
roiSize.width &= ~((widthMultiple * 2) - 1);
roiSize.width += widthMultiple;
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
@ -312,7 +318,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
Values(Size(0, 0)), // not used
FILTER_BORDER_SET_NO_ISOLATED,
Values(0.0), // not used
Bool()));
Bool(),
Values(1, 4)));
OCL_INSTANTIATE_TEST_CASE_P(Filter, LaplacianTest, Combine(
FILTER_TYPES,
@ -320,7 +327,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, LaplacianTest, Combine(
Values(Size(0, 0)), // not used
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
Values(1.0, 0.2, 3.0), // kernel scale
Bool()));
Bool(),
Values(1))); // not used
OCL_INSTANTIATE_TEST_CASE_P(Filter, SobelTest, Combine(
FILTER_TYPES,
@ -328,7 +336,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, SobelTest, Combine(
Values(Size(1, 0), Size(1, 1), Size(2, 0), Size(2, 1)), // dx, dy
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
Values(0.0), // not used
Bool()));
Bool(),
Values(1))); // not used
OCL_INSTANTIATE_TEST_CASE_P(Filter, ScharrTest, Combine(
FILTER_TYPES,
@ -336,7 +345,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, ScharrTest, Combine(
Values(Size(0, 1), Size(1, 0)), // dx, dy
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
Values(1.0, 0.2), // kernel scale
Bool()));
Bool(),
Values(1))); // not used
OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
FILTER_TYPES,
@ -344,7 +354,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
Values(Size(0, 0)), // not used
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
Values(0.0), // not used
Bool()));
Bool(),
Values(1))); // not used
OCL_INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC4),
@ -352,7 +363,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
Values(Size(0,0)),//not used
Values((BorderType)BORDER_CONSTANT),//not used
Values(1.0, 2.0, 3.0),
Bool()));
Bool(),
Values(1))); // not used
OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC4),
@ -360,7 +372,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
Values(Size(0,0)),//not used
Values((BorderType)BORDER_CONSTANT),//not used
Values(1.0, 2.0, 3.0),
Bool()));
Bool(),
Values(1))); //not used
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC4),
@ -368,7 +381,9 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
Values(Size(0, 0), Size(0, 1), Size(0, 2), Size(0, 3), Size(0, 4), Size(0, 5), Size(0, 6)), // used as generator of operations
Values((BorderType)BORDER_CONSTANT),// not used
Values(1.0, 2.0, 3.0),
Bool()));
Bool(),
Values(1))); //not used
} } // namespace cvtest::ocl