Cleaned up adaptive bilateral filtering, added support for gaussian interpolation, updated sample and docs
This commit is contained in:
committed by
Andrey Pavlenko
parent
370235c07b
commit
a1de91a4fd
@@ -20,6 +20,7 @@
|
||||
// Zero Lin, Zero.Lin@amd.com
|
||||
// Zhang Ying, zhangying913@gmail.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
// Harris Gasparakis, harris.gasparakis@amd.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
@@ -1407,7 +1408,7 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Adaptive Bilateral Filter
|
||||
|
||||
void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType)
|
||||
void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor, Point anchor, int borderType)
|
||||
{
|
||||
CV_Assert((ksize.width & 1) && (ksize.height & 1)); // ksize must be odd
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3); // source must be 8bit RGB image
|
||||
@@ -1418,10 +1419,24 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
|
||||
int idx = 0;
|
||||
int w = ksize.width / 2;
|
||||
int h = ksize.height / 2;
|
||||
for(int y=-h; y<=h; y++)
|
||||
for(int x=-w; x<=w; x++)
|
||||
|
||||
int ABF_GAUSSIAN_ocl = 1;
|
||||
|
||||
if(ABF_GAUSSIAN_ocl)
|
||||
{
|
||||
lut.at<float>(idx++) = sigma2 / (sigma2 + x * x + y * y);
|
||||
for(int y=-h; y<=h; y++)
|
||||
for(int x=-w; x<=w; x++)
|
||||
{
|
||||
lut.at<float>(idx++) = expf( (float)(-0.5 * (x * x + y * y)/sigma2));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int y=-h; y<=h; y++)
|
||||
for(int x=-w; x<=w; x++)
|
||||
{
|
||||
lut.at<float>(idx++) = (float) (sigma2 / (sigma2 + x * x + y * y));
|
||||
}
|
||||
}
|
||||
|
||||
oclMat dlut(lut);
|
||||
@@ -1429,7 +1444,7 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
|
||||
int cn = src.oclchannels();
|
||||
|
||||
normalizeAnchor(anchor, ksize);
|
||||
const static String kernelName = "edgeEnhancingFilter";
|
||||
const static String kernelName = "adaptiveBilateralFilter";
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
@@ -1478,9 +1493,10 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
|
||||
|
||||
//LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
|
||||
sprintf(build_options,
|
||||
"-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d"
|
||||
"-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d -D MAX_VAR_VAL=%f -D ABF_GAUSSIAN=%d"
|
||||
" -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
|
||||
static_cast<int>(EXTRA), static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
|
||||
static_cast<int>(EXTRA), static_cast<float>(maxSigmaColor*maxSigmaColor), static_cast<int>(ABF_GAUSSIAN_ocl),
|
||||
static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
|
||||
|
||||
std::vector<pair<size_t , const void *> > args;
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
|
||||
|
||||
@@ -85,7 +85,7 @@
|
||||
#endif
|
||||
|
||||
__kernel void
|
||||
edgeEnhancingFilter_C4_D0(
|
||||
adaptiveBilateralFilter_C4_D0(
|
||||
__global const uchar4 * restrict src,
|
||||
__global uchar4 *dst,
|
||||
float alpha,
|
||||
@@ -173,14 +173,14 @@ edgeEnhancingFilter_C4_D0(
|
||||
//find variance of all data
|
||||
int startLMj;
|
||||
int endLMj ;
|
||||
#if CALCVAR
|
||||
// Top row: don't sum the very last element
|
||||
for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
|
||||
{
|
||||
#if CALCVAR
|
||||
startLMj = extraCnt;
|
||||
endLMj = ksY+extraCnt-1;
|
||||
sumVal =0;
|
||||
sumValSqr=0;
|
||||
sumVal = (int4)0;
|
||||
sumValSqr= (int4)0;
|
||||
for(int j = startLMj; j < endLMj; j++)
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
@@ -190,9 +190,10 @@ edgeEnhancingFilter_C4_D0(
|
||||
sumValSqr += mul24(currVal, currVal);
|
||||
}
|
||||
|
||||
var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ;
|
||||
var[extraCnt] = clamp( convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ), (float4)(0.1f, 0.1f, 0.1f, 0.1f), (float4)(MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL)) ;
|
||||
|
||||
#else
|
||||
var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
|
||||
var[extraCnt] = (float4)(MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -221,32 +222,48 @@ edgeEnhancingFilter_C4_D0(
|
||||
#else
|
||||
weight = 1.0f;
|
||||
#endif
|
||||
#else
|
||||
#else // !FIXED_WEIGHT
|
||||
currVal = convert_int4(data[j][col+anX+i]);
|
||||
currWRTCenter = currVal-currValCenter;
|
||||
|
||||
#if ABF_GAUSSIAN
|
||||
|
||||
#if VAR_PER_CHANNEL
|
||||
weight = exp( (float4)(-0.5f, -0.5f, -0.5f, -0.5f) * convert_float4(currWRTCenter * currWRTCenter) / var[extraCnt] )*
|
||||
(float4)(lut[lut_j*lut_step+anX+i]);
|
||||
#else
|
||||
weight = exp( -0.5f * (mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
|
||||
mul24(currWRTCenter.z, currWRTCenter.z) ) / (var[extraCnt].x+var[extraCnt].y+var[extraCnt].z) ) * lut[lut_j*lut_step+anX+i];
|
||||
#endif
|
||||
|
||||
#else // !ABF_GAUSSIAN
|
||||
|
||||
#if VAR_PER_CHANNEL
|
||||
weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) *
|
||||
(float4)(lut[lut_j*lut_step+anX+i]);
|
||||
#else
|
||||
weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
|
||||
mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
|
||||
#endif
|
||||
weight = ((float)lut[lut_j*lut_step+anX+i]) /(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
|
||||
mul24(currWRTCenter.z, currWRTCenter.z))/(var[extraCnt].x+var[extraCnt].y+var[extraCnt].z));
|
||||
#endif
|
||||
|
||||
#endif //ABF_GAUSSIAN
|
||||
|
||||
|
||||
|
||||
#endif // FIXED_WEIGHT
|
||||
|
||||
tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
|
||||
totalWeight += weight;
|
||||
}
|
||||
}
|
||||
|
||||
tmp_sum[extraCnt] /= totalWeight;
|
||||
|
||||
if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
|
||||
dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
|
||||
dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4_rtz( (tmp_sum[extraCnt] / (float4)totalWeight) + (float4)0.5f);
|
||||
|
||||
#if VAR_PER_CHANNEL
|
||||
totalWeight = (float4)(0,0,0,0);
|
||||
#else
|
||||
totalWeight = 0;
|
||||
totalWeight = 0.0f;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -254,7 +271,7 @@ edgeEnhancingFilter_C4_D0(
|
||||
|
||||
|
||||
__kernel void
|
||||
edgeEnhancingFilter_C1_D0(
|
||||
adaptiveBilateralFilter_C1_D0(
|
||||
__global const uchar * restrict src,
|
||||
__global uchar *dst,
|
||||
float alpha,
|
||||
@@ -343,10 +360,11 @@ edgeEnhancingFilter_C1_D0(
|
||||
//find variance of all data
|
||||
int startLMj;
|
||||
int endLMj;
|
||||
#if CALCVAR
|
||||
|
||||
// Top row: don't sum the very last element
|
||||
for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
|
||||
{
|
||||
#if CALCVAR
|
||||
startLMj = extraCnt;
|
||||
endLMj = ksY+extraCnt-1;
|
||||
sumVal = 0;
|
||||
@@ -361,9 +379,9 @@ edgeEnhancingFilter_C1_D0(
|
||||
sumValSqr += mul24(currVal, currVal);
|
||||
}
|
||||
}
|
||||
var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ;
|
||||
var[extraCnt] = clamp((float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) , 0.1f, (float)(MAX_VAR_VAL) );
|
||||
#else
|
||||
var[extraCnt] = (float)(900.0);
|
||||
var[extraCnt] = (float)(MAX_VAR_VAL);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -389,19 +407,20 @@ edgeEnhancingFilter_C1_D0(
|
||||
currVal = (int)(data[j][col+anX+i]) ;
|
||||
currWRTCenter = currVal-currValCenter;
|
||||
|
||||
#if ABF_GAUSSIAN
|
||||
weight = exp( -0.5f * (float)mul24(currWRTCenter,currWRTCenter)/var[extraCnt]) * lut[lut_j*lut_step+anX+i] ;
|
||||
#else
|
||||
weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
|
||||
#endif
|
||||
#endif
|
||||
tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
|
||||
totalWeight += weight;
|
||||
}
|
||||
}
|
||||
|
||||
tmp_sum[extraCnt] /= totalWeight;
|
||||
|
||||
|
||||
if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
|
||||
{
|
||||
dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]);
|
||||
dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = convert_uchar_rtz(tmp_sum[extraCnt]/totalWeight+0.5f);
|
||||
}
|
||||
|
||||
totalWeight = 0;
|
||||
|
||||
Reference in New Issue
Block a user