add adaptive bilateral filter (cpp and ocl version)
This commit is contained in:
@@ -64,6 +64,7 @@ extern const char *filter_sep_row;
|
||||
extern const char *filter_sep_col;
|
||||
extern const char *filtering_laplacian;
|
||||
extern const char *filtering_morph;
|
||||
extern const char *filtering_adaptive_bilateral;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1616,3 +1617,100 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
|
||||
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
|
||||
f->apply(src, dst);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Adaptive Bilateral Filter
|
||||
|
||||
void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType)
|
||||
{
|
||||
CV_Assert((ksize.width & 1) && (ksize.height & 1)); // ksize must be odd
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3); // source must be 8bit RGB image
|
||||
if( sigmaSpace <= 0 )
|
||||
sigmaSpace = 1;
|
||||
Mat lut(Size(ksize.width, ksize.height), CV_32FC1);
|
||||
double sigma2 = sigmaSpace * sigmaSpace;
|
||||
int idx = 0;
|
||||
int w = ksize.width / 2;
|
||||
int h = ksize.height / 2;
|
||||
for(int y=-h; y<=h; y++)
|
||||
for(int x=-w; x<=w; x++)
|
||||
{
|
||||
lut.at<float>(idx++) = sigma2 / (sigma2 + x * x + y * y);
|
||||
}
|
||||
oclMat dlut(lut);
|
||||
int depth = src.depth();
|
||||
int cn = src.oclchannels();
|
||||
|
||||
normalizeAnchor(anchor, ksize);
|
||||
const static String kernelName = "edgeEnhancingFilter";
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
char btype[30];
|
||||
switch(borderType)
|
||||
{
|
||||
case BORDER_CONSTANT:
|
||||
sprintf(btype, "BORDER_CONSTANT");
|
||||
break;
|
||||
case BORDER_REPLICATE:
|
||||
sprintf(btype, "BORDER_REPLICATE");
|
||||
break;
|
||||
case BORDER_REFLECT:
|
||||
sprintf(btype, "BORDER_REFLECT");
|
||||
break;
|
||||
case BORDER_WRAP:
|
||||
sprintf(btype, "BORDER_WRAP");
|
||||
break;
|
||||
case BORDER_REFLECT101:
|
||||
sprintf(btype, "BORDER_REFLECT_101");
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "This border type is not supported");
|
||||
break;
|
||||
}
|
||||
|
||||
//the following constants may be adjusted for performance concerns
|
||||
const static size_t blockSizeX = 64, blockSizeY = 1, EXTRA = ksize.height - 1;
|
||||
|
||||
//Normalize the result by default
|
||||
const float alpha = ksize.height * ksize.width;
|
||||
|
||||
const size_t gSize = blockSizeX - ksize.width / 2 * 2;
|
||||
const size_t globalSizeX = (src.cols) % gSize == 0 ?
|
||||
src.cols / gSize * blockSizeX :
|
||||
(src.cols / gSize + 1) * blockSizeX;
|
||||
const size_t rows_per_thread = 1 + EXTRA;
|
||||
const size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
|
||||
((src.rows + rows_per_thread - 1) / rows_per_thread) :
|
||||
(((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
|
||||
|
||||
size_t globalThreads[3] = { globalSizeX, globalSizeY, 1};
|
||||
size_t localThreads[3] = { blockSizeX, blockSizeY, 1};
|
||||
|
||||
char build_options[250];
|
||||
|
||||
//LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
|
||||
sprintf(build_options,
|
||||
"-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d"
|
||||
" -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
|
||||
static_cast<int>(EXTRA), static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
|
||||
|
||||
std::vector<pair<size_t , const void *> > args;
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), &dlut.data));
|
||||
int lut_step = dlut.step1();
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&lut_step));
|
||||
|
||||
openCLExecuteKernel(Context::getContext(), &filtering_adaptive_bilateral, kernelName,
|
||||
globalThreads, localThreads, args, cn, depth, build_options);
|
||||
}
|
||||
424
modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
Normal file
424
modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
Normal file
@@ -0,0 +1,424 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Harris Gasparakis, harris.gasparakis@amd.com
|
||||
// Xiaopeng Fu, fuxiaopeng2222@163.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#ifdef BORDER_REPLICATE
|
||||
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr))
|
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (t_edge) :(i))
|
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (b_edge)-1 :(addr))
|
||||
#endif
|
||||
|
||||
#ifdef BORDER_REFLECT
|
||||
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
|
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i)-1 : (i))
|
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
|
||||
#endif
|
||||
|
||||
#ifdef BORDER_REFLECT_101
|
||||
//BORDER_REFLECT_101: gfedcb|abcdefgh|gfedcba
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
|
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i) : (i))
|
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
|
||||
#endif
|
||||
|
||||
//blur function does not support BORDER_WRAP
|
||||
#ifdef BORDER_WRAP
|
||||
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
|
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (i)+(b_edge) : (i))
|
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
|
||||
#endif
|
||||
|
||||
__kernel void
|
||||
edgeEnhancingFilter_C4_D0(
|
||||
__global const uchar4 * restrict src,
|
||||
__global uchar4 *dst,
|
||||
float alpha,
|
||||
int src_offset,
|
||||
int src_whole_rows,
|
||||
int src_whole_cols,
|
||||
int src_step,
|
||||
int dst_offset,
|
||||
int dst_rows,
|
||||
int dst_cols,
|
||||
int dst_step,
|
||||
__global const float* lut,
|
||||
int lut_step)
|
||||
{
|
||||
int col = get_local_id(0);
|
||||
const int gX = get_group_id(0);
|
||||
const int gY = get_group_id(1);
|
||||
|
||||
int src_x_off = (src_offset % src_step) >> 2;
|
||||
int src_y_off = src_offset / src_step;
|
||||
int dst_x_off = (dst_offset % dst_step) >> 2;
|
||||
int dst_y_off = dst_offset / dst_step;
|
||||
|
||||
int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
|
||||
int startY = (gY * (1+EXTRA)) - anY + src_y_off;
|
||||
|
||||
int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
|
||||
int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
|
||||
|
||||
int posX = dst_startX - dst_x_off + col;
|
||||
int posY = (gY * (1+EXTRA)) ;
|
||||
|
||||
__local uchar4 data[ksY+EXTRA][THREADS];
|
||||
|
||||
float4 tmp_sum[1+EXTRA];
|
||||
for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
|
||||
{
|
||||
tmp_sum[tmpint] = (float4)(0,0,0,0);
|
||||
}
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
bool con;
|
||||
uchar4 ss;
|
||||
for(int j = 0; j < ksY+EXTRA; j++)
|
||||
{
|
||||
con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
|
||||
|
||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||
if(con)
|
||||
{
|
||||
ss = src[(startY+j)*(src_step>>2) + cur_col];
|
||||
}
|
||||
|
||||
data[j][col] = con ? ss : (uchar4)0;
|
||||
}
|
||||
#else
|
||||
for(int j= 0; j < ksY+EXTRA; j++)
|
||||
{
|
||||
int selected_row;
|
||||
int selected_col;
|
||||
selected_row = ADDR_H(startY+j, 0, src_whole_rows);
|
||||
selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
|
||||
|
||||
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
||||
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
||||
|
||||
data[j][col] = src[selected_row * (src_step>>2) + selected_col];
|
||||
}
|
||||
#endif
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float4 var[1+EXTRA];
|
||||
|
||||
#if VAR_PER_CHANNEL
|
||||
float4 weight;
|
||||
float4 totalWeight = (float4)(0,0,0,0);
|
||||
#else
|
||||
float weight;
|
||||
float totalWeight = 0;
|
||||
#endif
|
||||
|
||||
int4 currValCenter;
|
||||
int4 currWRTCenter;
|
||||
|
||||
int4 sumVal = 0;
|
||||
int4 sumValSqr = 0;
|
||||
|
||||
if(col < (THREADS-(ksX-1)))
|
||||
{
|
||||
int4 currVal;
|
||||
|
||||
int howManyAll = (2*anX+1)*(ksY);
|
||||
|
||||
//find variance of all data
|
||||
int startLMj;
|
||||
int endLMj ;
|
||||
#if CALCVAR
|
||||
// Top row: don't sum the very last element
|
||||
for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
|
||||
{
|
||||
startLMj = extraCnt;
|
||||
endLMj = ksY+extraCnt-1;
|
||||
sumVal =0;
|
||||
sumValSqr=0;
|
||||
for(int j = startLMj; j < endLMj; j++)
|
||||
{
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
currVal = convert_int4(data[j][col+anX+i]) ;
|
||||
|
||||
sumVal += currVal;
|
||||
sumValSqr += mul24(currVal, currVal);
|
||||
}
|
||||
}
|
||||
var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ;
|
||||
#else
|
||||
var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
|
||||
#endif
|
||||
}
|
||||
|
||||
for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
|
||||
{
|
||||
|
||||
// top row: include the very first element, even on first time
|
||||
startLMj = extraCnt;
|
||||
// go all the way, unless this is the last local mem chunk,
|
||||
// then stay within limits - 1
|
||||
endLMj = extraCnt + ksY;
|
||||
|
||||
// Top row: don't sum the very last element
|
||||
currValCenter = convert_int4( data[ (startLMj + endLMj)/2][col+anX] );
|
||||
|
||||
for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
|
||||
{
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
#if FIXED_WEIGHT
|
||||
#if VAR_PER_CHANNEL
|
||||
weight.x = 1.0f;
|
||||
weight.y = 1.0f;
|
||||
weight.z = 1.0f;
|
||||
weight.w = 1.0f;
|
||||
#else
|
||||
weight = 1.0f;
|
||||
#endif
|
||||
#else
|
||||
currVal = convert_int4(data[j][col+anX+i]) ;
|
||||
currWRTCenter = currVal-currValCenter;
|
||||
|
||||
#if VAR_PER_CHANNEL
|
||||
weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) * (float4)(lut[lut_j*lut_step+anX+i]);
|
||||
//weight.x = var[extraCnt].x / ( var[extraCnt].x + (float) mul24(currWRTCenter.x , currWRTCenter.x) ) ;
|
||||
//weight.y = var[extraCnt].y / ( var[extraCnt].y + (float) mul24(currWRTCenter.y , currWRTCenter.y) ) ;
|
||||
//weight.z = var[extraCnt].z / ( var[extraCnt].z + (float) mul24(currWRTCenter.z , currWRTCenter.z) ) ;
|
||||
//weight.w = 0;
|
||||
#else
|
||||
weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) + mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
|
||||
#endif
|
||||
#endif
|
||||
tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
|
||||
totalWeight += weight;
|
||||
}
|
||||
}
|
||||
|
||||
tmp_sum[extraCnt] /= totalWeight;
|
||||
|
||||
if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
|
||||
{
|
||||
dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
|
||||
}
|
||||
|
||||
#if VAR_PER_CHANNEL
|
||||
totalWeight = (float4)(0,0,0,0);
|
||||
#else
|
||||
totalWeight = 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
edgeEnhancingFilter_C1_D0(
|
||||
__global const uchar * restrict src,
|
||||
__global uchar *dst,
|
||||
float alpha,
|
||||
int src_offset,
|
||||
int src_whole_rows,
|
||||
int src_whole_cols,
|
||||
int src_step,
|
||||
int dst_offset,
|
||||
int dst_rows,
|
||||
int dst_cols,
|
||||
int dst_step,
|
||||
__global const float * lut,
|
||||
int lut_step)
|
||||
{
|
||||
int col = get_local_id(0);
|
||||
const int gX = get_group_id(0);
|
||||
const int gY = get_group_id(1);
|
||||
|
||||
int src_x_off = (src_offset % src_step);
|
||||
int src_y_off = src_offset / src_step;
|
||||
int dst_x_off = (dst_offset % dst_step);
|
||||
int dst_y_off = dst_offset / dst_step;
|
||||
|
||||
int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
|
||||
int startY = (gY * (1+EXTRA)) - anY + src_y_off;
|
||||
|
||||
int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
|
||||
int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
|
||||
|
||||
int posX = dst_startX - dst_x_off + col;
|
||||
int posY = (gY * (1+EXTRA)) ;
|
||||
|
||||
__local uchar data[ksY+EXTRA][THREADS];
|
||||
|
||||
float tmp_sum[1+EXTRA];
|
||||
for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
|
||||
{
|
||||
tmp_sum[tmpint] = (float)(0);
|
||||
}
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
bool con;
|
||||
uchar ss;
|
||||
for(int j = 0; j < ksY+EXTRA; j++)
|
||||
{
|
||||
con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
|
||||
|
||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||
if(con)
|
||||
{
|
||||
ss = src[(startY+j)*(src_step) + cur_col];
|
||||
}
|
||||
|
||||
data[j][col] = con ? ss : 0;
|
||||
}
|
||||
#else
|
||||
for(int j= 0; j < ksY+EXTRA; j++)
|
||||
{
|
||||
int selected_row;
|
||||
int selected_col;
|
||||
selected_row = ADDR_H(startY+j, 0, src_whole_rows);
|
||||
selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
|
||||
|
||||
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
||||
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
||||
|
||||
data[j][col] = src[selected_row * (src_step) + selected_col];
|
||||
}
|
||||
#endif
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float var[1+EXTRA];
|
||||
|
||||
float weight;
|
||||
float totalWeight = 0;
|
||||
|
||||
int currValCenter;
|
||||
int currWRTCenter;
|
||||
|
||||
int sumVal = 0;
|
||||
int sumValSqr = 0;
|
||||
|
||||
if(col < (THREADS-(ksX-1)))
|
||||
{
|
||||
int currVal;
|
||||
|
||||
int howManyAll = (2*anX+1)*(ksY);
|
||||
|
||||
//find variance of all data
|
||||
int startLMj;
|
||||
int endLMj;
|
||||
#if CALCVAR
|
||||
// Top row: don't sum the very last element
|
||||
for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
|
||||
{
|
||||
startLMj = extraCnt;
|
||||
endLMj = ksY+extraCnt-1;
|
||||
sumVal = 0;
|
||||
sumValSqr =0;
|
||||
for(int j = startLMj; j < endLMj; j++)
|
||||
{
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
currVal = (uint)(data[j][col+anX+i]) ;
|
||||
|
||||
sumVal += currVal;
|
||||
sumValSqr += mul24(currVal, currVal);
|
||||
}
|
||||
}
|
||||
var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ;
|
||||
#else
|
||||
var[extraCnt] = (float)(900.0);
|
||||
#endif
|
||||
}
|
||||
|
||||
for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
|
||||
{
|
||||
|
||||
// top row: include the very first element, even on first time
|
||||
startLMj = extraCnt;
|
||||
// go all the way, unless this is the last local mem chunk,
|
||||
// then stay within limits - 1
|
||||
endLMj = extraCnt + ksY;
|
||||
|
||||
// Top row: don't sum the very last element
|
||||
currValCenter = (int)( data[ (startLMj + endLMj)/2][col+anX] );
|
||||
|
||||
for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
|
||||
{
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
#if FIXED_WEIGHT
|
||||
weight = 1.0f;
|
||||
#else
|
||||
currVal = (int)(data[j][col+anX+i]) ;
|
||||
currWRTCenter = currVal-currValCenter;
|
||||
|
||||
weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
|
||||
#endif
|
||||
tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
|
||||
totalWeight += weight;
|
||||
}
|
||||
}
|
||||
|
||||
tmp_sum[extraCnt] /= totalWeight;
|
||||
|
||||
|
||||
if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
|
||||
{
|
||||
dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]);
|
||||
}
|
||||
|
||||
totalWeight = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user