Cleaned up adaptive bilateral filtering, added support for gaussian interpolation, updated sample and docs

This commit is contained in:
Harris Gasparakis
2013-11-05 07:04:04 -05:00
committed by Andrey Pavlenko
parent 370235c07b
commit a1de91a4fd
9 changed files with 173 additions and 80 deletions

View File

@@ -20,6 +20,7 @@
// Zero Lin, Zero.Lin@amd.com
// Zhang Ying, zhangying913@gmail.com
// Yao Wang, bitwangyaoyao@gmail.com
// Harris Gasparakis, harris.gasparakis@amd.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@@ -1407,7 +1408,7 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
////////////////////////////////////////////////////////////////////////////////////////////////////
// Adaptive Bilateral Filter
void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType)
void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor, Point anchor, int borderType)
{
CV_Assert((ksize.width & 1) && (ksize.height & 1)); // ksize must be odd
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3); // source must be 8bit RGB image
@@ -1418,10 +1419,24 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
int idx = 0;
int w = ksize.width / 2;
int h = ksize.height / 2;
for(int y=-h; y<=h; y++)
for(int x=-w; x<=w; x++)
int ABF_GAUSSIAN_ocl = 1;
if(ABF_GAUSSIAN_ocl)
{
lut.at<float>(idx++) = sigma2 / (sigma2 + x * x + y * y);
for(int y=-h; y<=h; y++)
for(int x=-w; x<=w; x++)
{
lut.at<float>(idx++) = expf( (float)(-0.5 * (x * x + y * y)/sigma2));
}
}
else
{
for(int y=-h; y<=h; y++)
for(int x=-w; x<=w; x++)
{
lut.at<float>(idx++) = (float) (sigma2 / (sigma2 + x * x + y * y));
}
}
oclMat dlut(lut);
@@ -1429,7 +1444,7 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
int cn = src.oclchannels();
normalizeAnchor(anchor, ksize);
const static String kernelName = "edgeEnhancingFilter";
const static String kernelName = "adaptiveBilateralFilter";
dst.create(src.size(), src.type());
@@ -1478,9 +1493,10 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
//LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
sprintf(build_options,
"-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d"
"-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d -D MAX_VAR_VAL=%f -D ABF_GAUSSIAN=%d"
" -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
static_cast<int>(EXTRA), static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
static_cast<int>(EXTRA), static_cast<float>(maxSigmaColor*maxSigmaColor), static_cast<int>(ABF_GAUSSIAN_ocl),
static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
std::vector<pair<size_t , const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), &src.data));

View File

@@ -85,7 +85,7 @@
#endif
__kernel void
edgeEnhancingFilter_C4_D0(
adaptiveBilateralFilter_C4_D0(
__global const uchar4 * restrict src,
__global uchar4 *dst,
float alpha,
@@ -173,14 +173,14 @@ edgeEnhancingFilter_C4_D0(
//find variance of all data
int startLMj;
int endLMj ;
#if CALCVAR
// Top row: don't sum the very last element
for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
{
#if CALCVAR
startLMj = extraCnt;
endLMj = ksY+extraCnt-1;
sumVal =0;
sumValSqr=0;
sumVal = (int4)0;
sumValSqr= (int4)0;
for(int j = startLMj; j < endLMj; j++)
for(int i=-anX; i<=anX; i++)
{
@@ -190,9 +190,10 @@ edgeEnhancingFilter_C4_D0(
sumValSqr += mul24(currVal, currVal);
}
var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ;
var[extraCnt] = clamp( convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ), (float4)(0.1f, 0.1f, 0.1f, 0.1f), (float4)(MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL)) ;
#else
var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
var[extraCnt] = (float4)(MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL);
#endif
}
@@ -221,32 +222,48 @@ edgeEnhancingFilter_C4_D0(
#else
weight = 1.0f;
#endif
#else
#else // !FIXED_WEIGHT
currVal = convert_int4(data[j][col+anX+i]);
currWRTCenter = currVal-currValCenter;
#if ABF_GAUSSIAN
#if VAR_PER_CHANNEL
weight = exp( (float4)(-0.5f, -0.5f, -0.5f, -0.5f) * convert_float4(currWRTCenter * currWRTCenter) / var[extraCnt] )*
(float4)(lut[lut_j*lut_step+anX+i]);
#else
weight = exp( -0.5f * (mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
mul24(currWRTCenter.z, currWRTCenter.z) ) / (var[extraCnt].x+var[extraCnt].y+var[extraCnt].z) ) * lut[lut_j*lut_step+anX+i];
#endif
#else // !ABF_GAUSSIAN
#if VAR_PER_CHANNEL
weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) *
(float4)(lut[lut_j*lut_step+anX+i]);
#else
weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
#endif
weight = ((float)lut[lut_j*lut_step+anX+i]) /(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
mul24(currWRTCenter.z, currWRTCenter.z))/(var[extraCnt].x+var[extraCnt].y+var[extraCnt].z));
#endif
#endif //ABF_GAUSSIAN
#endif // FIXED_WEIGHT
tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
totalWeight += weight;
}
}
tmp_sum[extraCnt] /= totalWeight;
if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4_rtz( (tmp_sum[extraCnt] / (float4)totalWeight) + (float4)0.5f);
#if VAR_PER_CHANNEL
totalWeight = (float4)(0,0,0,0);
#else
totalWeight = 0;
totalWeight = 0.0f;
#endif
}
}
@@ -254,7 +271,7 @@ edgeEnhancingFilter_C4_D0(
__kernel void
edgeEnhancingFilter_C1_D0(
adaptiveBilateralFilter_C1_D0(
__global const uchar * restrict src,
__global uchar *dst,
float alpha,
@@ -343,10 +360,11 @@ edgeEnhancingFilter_C1_D0(
//find variance of all data
int startLMj;
int endLMj;
#if CALCVAR
// Top row: don't sum the very last element
for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
{
#if CALCVAR
startLMj = extraCnt;
endLMj = ksY+extraCnt-1;
sumVal = 0;
@@ -361,9 +379,9 @@ edgeEnhancingFilter_C1_D0(
sumValSqr += mul24(currVal, currVal);
}
}
var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ;
var[extraCnt] = clamp((float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) , 0.1f, (float)(MAX_VAR_VAL) );
#else
var[extraCnt] = (float)(900.0);
var[extraCnt] = (float)(MAX_VAR_VAL);
#endif
}
@@ -389,19 +407,20 @@ edgeEnhancingFilter_C1_D0(
currVal = (int)(data[j][col+anX+i]) ;
currWRTCenter = currVal-currValCenter;
#if ABF_GAUSSIAN
weight = exp( -0.5f * (float)mul24(currWRTCenter,currWRTCenter)/var[extraCnt]) * lut[lut_j*lut_step+anX+i] ;
#else
weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
#endif
#endif
tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
totalWeight += weight;
}
}
tmp_sum[extraCnt] /= totalWeight;
if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
{
dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]);
dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = convert_uchar_rtz(tmp_sum[extraCnt]/totalWeight+0.5f);
}
totalWeight = 0;