Merge pull request #1693 from ilya-lavrenov:ocl_adaptiveBilateralFilter

This commit is contained in:
Andrey Pavlenko 2013-10-25 19:34:33 +04:00 committed by OpenCV Buildbot
commit 92c43a8646

View File

@ -45,38 +45,43 @@
// //
//M*/ //M*/
#ifdef BORDER_CONSTANT
#ifdef BORDER_REPLICATE #define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh #elif defined BORDER_REPLICATE
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i)) #define EXTRAPOLATE(x, maxV) \
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr)) { \
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (t_edge) :(i)) x = max(min(x, maxV - 1), 0); \
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (b_edge)-1 :(addr)) }
#endif #elif defined BORDER_WRAP
#define EXTRAPOLATE(x, maxV) \
{ \
if (x < 0) \
x -= ((x - maxV + 1) / maxV) * maxV; \
if (x >= maxV) \
x %= maxV; \
}
#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
#define EXTRAPOLATE_(x, maxV, delta) \
{ \
if (maxV == 1) \
x = 0; \
else \
do \
{ \
if ( x < 0 ) \
x = -x - 1 + delta; \
else \
x = maxV - 1 - (x - maxV) - delta; \
} \
while (x >= maxV || x < 0); \
}
#ifdef BORDER_REFLECT #ifdef BORDER_REFLECT
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb #define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 0)
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i)) #else
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr)) #define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 1)
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i)-1 : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
#endif #endif
#else
#ifdef BORDER_REFLECT_101 #error No extrapolation method
//BORDER_REFLECT_101: gfedcb|abcdefgh|gfedcba
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i) : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
#endif
//blur function does not support BORDER_WRAP
#ifdef BORDER_WRAP
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (i)+(b_edge) : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
#endif #endif
__kernel void __kernel void
@ -117,9 +122,7 @@ edgeEnhancingFilter_C4_D0(
float4 tmp_sum[1+EXTRA]; float4 tmp_sum[1+EXTRA];
for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++) for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
{
tmp_sum[tmpint] = (float4)(0,0,0,0); tmp_sum[tmpint] = (float4)(0,0,0,0);
}
#ifdef BORDER_CONSTANT #ifdef BORDER_CONSTANT
bool con; bool con;
@ -127,25 +130,18 @@ edgeEnhancingFilter_C4_D0(
for(int j = 0; j < ksY+EXTRA; j++) for(int j = 0; j < ksY+EXTRA; j++)
{ {
con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows); con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
int cur_col = clamp(startX + col, 0, src_whole_cols); int cur_col = clamp(startX + col, 0, src_whole_cols);
if(con) if (con)
{
ss = src[(startY+j)*(src_step>>2) + cur_col]; ss = src[(startY+j)*(src_step>>2) + cur_col];
}
data[j][col] = con ? ss : (uchar4)0; data[j][col] = con ? ss : (uchar4)0;
} }
#else #else
for(int j= 0; j < ksY+EXTRA; j++) for(int j= 0; j < ksY+EXTRA; j++)
{ {
int selected_row; int selected_row = startY+j, selected_col = startX+col;
int selected_col; EXTRAPOLATE(selected_row, src_whole_rows)
selected_row = ADDR_H(startY+j, 0, src_whole_rows); EXTRAPOLATE(selected_col, src_whole_cols)
selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
data[j][col] = src[selected_row * (src_step>>2) + selected_col]; data[j][col] = src[selected_row * (src_step>>2) + selected_col];
} }
@ -172,7 +168,6 @@ edgeEnhancingFilter_C4_D0(
if(col < (THREADS-(ksX-1))) if(col < (THREADS-(ksX-1)))
{ {
int4 currVal; int4 currVal;
int howManyAll = (2*anX+1)*(ksY); int howManyAll = (2*anX+1)*(ksY);
//find variance of all data //find variance of all data
@ -187,15 +182,14 @@ edgeEnhancingFilter_C4_D0(
sumVal =0; sumVal =0;
sumValSqr=0; sumValSqr=0;
for(int j = startLMj; j < endLMj; j++) for(int j = startLMj; j < endLMj; j++)
{
for(int i=-anX; i<=anX; i++) for(int i=-anX; i<=anX; i++)
{ {
currVal = convert_int4(data[j][col+anX+i]) ; currVal = convert_int4(data[j][col+anX+i]);
sumVal += currVal; sumVal += currVal;
sumValSqr += mul24(currVal, currVal); sumValSqr += mul24(currVal, currVal);
} }
}
var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ; var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ;
#else #else
var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0); var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
@ -228,17 +222,15 @@ edgeEnhancingFilter_C4_D0(
weight = 1.0f; weight = 1.0f;
#endif #endif
#else #else
currVal = convert_int4(data[j][col+anX+i]) ; currVal = convert_int4(data[j][col+anX+i]);
currWRTCenter = currVal-currValCenter; currWRTCenter = currVal-currValCenter;
#if VAR_PER_CHANNEL #if VAR_PER_CHANNEL
weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) * (float4)(lut[lut_j*lut_step+anX+i]); weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) *
//weight.x = var[extraCnt].x / ( var[extraCnt].x + (float) mul24(currWRTCenter.x , currWRTCenter.x) ) ; (float4)(lut[lut_j*lut_step+anX+i]);
//weight.y = var[extraCnt].y / ( var[extraCnt].y + (float) mul24(currWRTCenter.y , currWRTCenter.y) ) ;
//weight.z = var[extraCnt].z / ( var[extraCnt].z + (float) mul24(currWRTCenter.z , currWRTCenter.z) ) ;
//weight.w = 0;
#else #else
weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) + mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z)); weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
#endif #endif
#endif #endif
tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight; tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
@ -249,9 +241,7 @@ edgeEnhancingFilter_C4_D0(
tmp_sum[extraCnt] /= totalWeight; tmp_sum[extraCnt] /= totalWeight;
if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows) if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
{
dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]); dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
}
#if VAR_PER_CHANNEL #if VAR_PER_CHANNEL
totalWeight = (float4)(0,0,0,0); totalWeight = (float4)(0,0,0,0);
@ -323,13 +313,9 @@ edgeEnhancingFilter_C1_D0(
#else #else
for(int j= 0; j < ksY+EXTRA; j++) for(int j= 0; j < ksY+EXTRA; j++)
{ {
int selected_row; int selected_row = startY+j, selected_col = startX+col;
int selected_col; EXTRAPOLATE(selected_row, src_whole_rows)
selected_row = ADDR_H(startY+j, 0, src_whole_rows); EXTRAPOLATE(selected_col, src_whole_cols)
selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
data[j][col] = src[selected_row * (src_step) + selected_col]; data[j][col] = src[selected_row * (src_step) + selected_col];
} }