Cleaned up adaptive bilateral filtering, added support for gaussian interpolation, updated sample and docs

2013-11-05 07:04:04 -05:00
parent 370235c07b
commit a1de91a4fd
9 changed files with 173 additions and 80 deletions
--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@ -20,6 +20,7 @@
 //    Zero Lin, Zero.Lin@amd.com
 //    Zhang Ying, zhangying913@gmail.com
 //    Yao Wang, bitwangyaoyao@gmail.com
+//    Harris Gasparakis, harris.gasparakis@amd.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -1407,7 +1408,7 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Adaptive Bilateral Filter

-void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType)
+void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor, Point anchor, int borderType)
 {
    CV_Assert((ksize.width & 1) && (ksize.height & 1));  // ksize must be odd
    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);  // source must be 8bit RGB image
@@ -1418,10 +1419,24 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
    int idx = 0;
    int w = ksize.width / 2;
    int h = ksize.height / 2;
-    for(int y=-h; y<=h; y++)
-        for(int x=-w; x<=w; x++)
+
+    int ABF_GAUSSIAN_ocl = 1;
+
+    if(ABF_GAUSSIAN_ocl)
    {
-        lut.at<float>(idx++) = sigma2 / (sigma2 + x * x + y * y);
+        for(int y=-h; y<=h; y++)
+            for(int x=-w; x<=w; x++)
+        {
+            lut.at<float>(idx++) = expf( (float)(-0.5 * (x * x + y * y)/sigma2));
+        }
+    }
+    else
+    {
+        for(int y=-h; y<=h; y++)
+            for(int x=-w; x<=w; x++)
+        {
+            lut.at<float>(idx++) = (float) (sigma2 / (sigma2 + x * x + y * y));
+        }
    }

    oclMat dlut(lut);
@@ -1429,7 +1444,7 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
    int cn = src.oclchannels();

    normalizeAnchor(anchor, ksize);
-    const static String kernelName = "edgeEnhancingFilter";
+    const static String kernelName = "adaptiveBilateralFilter";

    dst.create(src.size(), src.type());

@@ -1478,9 +1493,10 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize

    //LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
    sprintf(build_options,
-        "-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d"
+        "-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d -D MAX_VAR_VAL=%f -D ABF_GAUSSIAN=%d"
        " -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
-        static_cast<int>(EXTRA), static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
+        static_cast<int>(EXTRA), static_cast<float>(maxSigmaColor*maxSigmaColor), static_cast<int>(ABF_GAUSSIAN_ocl),
+        static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);

    std::vector<pair<size_t , const void *> > args;
    args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
--- a/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
+++ b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
@@ -85,7 +85,7 @@
 #endif

 __kernel void
-edgeEnhancingFilter_C4_D0(
+adaptiveBilateralFilter_C4_D0(
    __global const uchar4 * restrict src,
    __global uchar4 *dst,
    float alpha,
@@ -173,14 +173,14 @@ edgeEnhancingFilter_C4_D0(
        //find variance of all data
        int startLMj;
        int endLMj ;
-#if CALCVAR
        // Top row: don't sum the very last element
        for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
        {
+#if CALCVAR
            startLMj = extraCnt;
            endLMj =  ksY+extraCnt-1;
-            sumVal =0;
-            sumValSqr=0;
+            sumVal = (int4)0;
+            sumValSqr= (int4)0;
            for(int j = startLMj; j < endLMj; j++)
                for(int i=-anX; i<=anX; i++)
                {
@@ -190,9 +190,10 @@ edgeEnhancingFilter_C4_D0(
                    sumValSqr += mul24(currVal, currVal);
                }

-            var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+            var[extraCnt] = clamp( convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ), (float4)(0.1f, 0.1f, 0.1f, 0.1f), (float4)(MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL)) ;
+
 #else
-        var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
+            var[extraCnt] = (float4)(MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL);
 #endif
        }

@@ -221,32 +222,48 @@ edgeEnhancingFilter_C4_D0(
 #else
                    weight = 1.0f;
 #endif
-#else
+#else // !FIXED_WEIGHT
                    currVal = convert_int4(data[j][col+anX+i]);
                    currWRTCenter = currVal-currValCenter;

+#if ABF_GAUSSIAN
+
+#if VAR_PER_CHANNEL
+                    weight = exp( (float4)(-0.5f, -0.5f, -0.5f, -0.5f) * convert_float4(currWRTCenter * currWRTCenter) / var[extraCnt] )*
+                        (float4)(lut[lut_j*lut_step+anX+i]);
+#else
+                    weight = exp( -0.5f * (mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
+                        mul24(currWRTCenter.z, currWRTCenter.z) ) / (var[extraCnt].x+var[extraCnt].y+var[extraCnt].z) ) * lut[lut_j*lut_step+anX+i];
+#endif
+
+#else // !ABF_GAUSSIAN
+
 #if VAR_PER_CHANNEL
                    weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) *
                        (float4)(lut[lut_j*lut_step+anX+i]);
 #else
-                    weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
-                        mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
-#endif
+                    weight = ((float)lut[lut_j*lut_step+anX+i]) /(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
+                        mul24(currWRTCenter.z, currWRTCenter.z))/(var[extraCnt].x+var[extraCnt].y+var[extraCnt].z));
 #endif
+
+#endif //ABF_GAUSSIAN
+
+
+
+#endif  // FIXED_WEIGHT
+
                    tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
                    totalWeight += weight;
                }
            }

-            tmp_sum[extraCnt] /= totalWeight;
-
            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
-                dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
+                dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4_rtz( (tmp_sum[extraCnt] / (float4)totalWeight) + (float4)0.5f);

 #if VAR_PER_CHANNEL
            totalWeight = (float4)(0,0,0,0);
 #else
-            totalWeight = 0;
+            totalWeight = 0.0f;
 #endif
        }
    }
@@ -254,7 +271,7 @@ edgeEnhancingFilter_C4_D0(


 __kernel void
-edgeEnhancingFilter_C1_D0(
+adaptiveBilateralFilter_C1_D0(
    __global const uchar * restrict src,
    __global uchar *dst,
    float alpha,
@@ -343,10 +360,11 @@ edgeEnhancingFilter_C1_D0(
        //find variance of all data
        int startLMj;
        int endLMj;
-#if CALCVAR
+
        // Top row: don't sum the very last element
        for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
        {
+#if CALCVAR
            startLMj = extraCnt;
            endLMj =  ksY+extraCnt-1;
            sumVal = 0;
@@ -361,9 +379,9 @@ edgeEnhancingFilter_C1_D0(
                    sumValSqr += mul24(currVal, currVal);
                }
            }
-            var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+            var[extraCnt] =  clamp((float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) , 0.1f, (float)(MAX_VAR_VAL) );
 #else
-        var[extraCnt] = (float)(900.0);
+            var[extraCnt] = (float)(MAX_VAR_VAL);
 #endif
        }

@@ -389,19 +407,20 @@ edgeEnhancingFilter_C1_D0(
                    currVal	= (int)(data[j][col+anX+i])	;
                    currWRTCenter = currVal-currValCenter;

+#if ABF_GAUSSIAN
+                    weight = exp( -0.5f * (float)mul24(currWRTCenter,currWRTCenter)/var[extraCnt]) * lut[lut_j*lut_step+anX+i] ;
+#else
                    weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
+#endif
 #endif
                    tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
                    totalWeight += weight;
                }
            }

-            tmp_sum[extraCnt] /= totalWeight;
-
-
            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
            {
-                dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]);
+                dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = convert_uchar_rtz(tmp_sum[extraCnt]/totalWeight+0.5f);
            }

            totalWeight = 0;