Changed pointers from sample_t * to uchar *. Rescaling psz accordingly.

This commit is contained in:
Erik Karlsson 2015-03-03 03:02:44 +01:00
parent d56d04e41b
commit 69eae13ff3
2 changed files with 16 additions and 20 deletions

View File

@ -89,13 +89,13 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
char buf[4][40];
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D sample_t=%s -D pixel_t=%s -D int_t=%s"
" -D pixel_t=%s -D int_t=%s"
" -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s",
templateWindowSize, searchWindowSize,
ocl::typeToStr(depth), ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),
ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),
depth == CV_8U ? ocl::typeToStr(CV_32S) : "long",
depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) :
(sprintf(buf[0], "long%d", cn), buf[0]),
@ -103,7 +103,8 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
(sprintf(buf[1], "convert_long%d", cn), buf[1]),
BLOCK_COLS, BLOCK_ROWS,
ctaSize, templateWindowHalfWize, searchWindowHalfSize,
ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn, cn == 3 ? 4 : cn,
ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,
(depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn),
ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), abs ? " -D ABS" : "");
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);

View File

@ -97,7 +97,7 @@ inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_v
#define COND if (x == 0 && y == 0)
inline void calcFirstElementInRow(__global const sample_t * src, int src_step, int src_offset,
inline void calcFirstElementInRow(__global const uchar * src, int src_step, int src_offset,
__local int * dists, int y, int x, int id,
__global int * col_dists, __global int * up_col_dists)
{
@ -129,8 +129,8 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i
dist += value;
}
src_current = (__global const pixel_t *)((__global const sample_t *)src_current + src_step);
src_template = (__global const pixel_t *)((__global const sample_t *)src_template + src_step);
src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
}
#pragma unroll
@ -142,7 +142,7 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i
}
}
inline void calcElementInFirstRow(__global const sample_t * src, int src_step, int src_offset,
inline void calcElementInFirstRow(__global const uchar * src, int src_step, int src_offset,
__local int * dists, int y, int x0, int x, int id, int first,
__global int * col_dists, __global int * up_col_dists)
{
@ -164,8 +164,8 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i
{
col_dist += calcDist(src_current[0], src_template[0]);
src_current = (__global const pixel_t *)((__global const sample_t *)src_current + src_step);
src_template = (__global const pixel_t *)((__global const sample_t *)src_template + src_step);
src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
}
dists[i] += col_dist - col_dists_current[first];
@ -174,7 +174,7 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i
}
}
inline void calcElement(__global const sample_t * src, int src_step, int src_offset,
inline void calcElement(__global const uchar * src, int src_step, int src_offset,
__local int * dists, int y, int x0, int x, int id, int first,
__global int * col_dists, __global int * up_col_dists)
{
@ -207,9 +207,9 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off
}
}
inline void convolveWindow(__global const sample_t * src, int src_step, int src_offset,
inline void convolveWindow(__global const uchar * src, int src_step, int src_offset,
__local int * dists, __global const int * almostDist2Weight,
__global sample_t * dst, int dst_step, int dst_offset,
__global uchar * dst, int dst_step, int dst_offset,
int y, int x, int id, __local weight_t * weights_local,
__local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
{
@ -255,9 +255,9 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_
}
}
__kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step, int src_offset,
__global sample_t * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__global const int * almostDist2Weight, __global sample_t * buffer,
__kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__global const int * almostDist2Weight, __global uchar * buffer,
int almostTemplateWindowSizeSqBinShift)
{
int block_x = get_group_id(0), nblocks_x = get_num_groups(0);
@ -277,11 +277,6 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step,
__global int * col_dists = (__global int *)(buffer + block_data_start * sizeof(int));
__global int * up_col_dists = col_dists + SEARCH_SIZE_SQ * TEMPLATE_SIZE;
src_step /= sizeof(sample_t);
src_offset /= sizeof(sample_t);
dst_step /= sizeof(sample_t);
dst_offset /= sizeof(sample_t);
for (int y = y0; y < y1; ++y)
for (int x = x0; x < x1; ++x)
{