format files to ANSI C style with coolformat
change the download channels to oclchannles() fix bugs of arithm functions perf fix of bilateral bug fix of split test case add build_warps functions
This commit is contained in:
@@ -70,9 +70,22 @@ __kernel void arithm_absdiff_D0 (__global uchar *src1, int src1_step, int src1_o
|
||||
int dst_start = mad24(y, dst_step, dst_offset);
|
||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
||||
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||
if(src1_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
if(src2_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
|
||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||
uchar4 tmp_data = abs_diff(src1_data, src2_data);
|
||||
@@ -242,9 +255,15 @@ __kernel void arithm_s_absdiff_C1_D0 (__global uchar *src1, int src1_step, int
|
||||
int dst_start = mad24(y, dst_step, dst_offset);
|
||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
||||
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
|
||||
if(src1_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
|
||||
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
||||
uchar4 tmp_data = convert_uchar4_sat(abs_diff(convert_int4_sat(src1_data), src2_data));
|
||||
|
||||
@@ -71,10 +71,22 @@ __kernel void arithm_add_D0 (__global uchar *src1, int src1_step, int src1_offse
|
||||
int dst_start = mad24(y, dst_step, dst_offset);
|
||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
||||
|
||||
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||
if(src1_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
if(src2_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||
short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
|
||||
uchar4 tmp_data = convert_uchar4_sat(tmp);
|
||||
@@ -248,11 +260,31 @@ __kernel void arithm_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, i
|
||||
int dst_start = mad24(y, dst_step, dst_offset);
|
||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
||||
uchar4 mask_data = vload4(0, mask + mask_index);
|
||||
|
||||
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||
int mask_index_fix = mask_index < 0 ? 0 : mask_index;
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||
uchar4 mask_data = vload4(0, mask + mask_index_fix);
|
||||
if(src1_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
if(src2_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
if(mask_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (mask_index == -2) ? mask_data.zwxy:mask_data.yzwx;
|
||||
mask_data.xyzw = (mask_index == -1) ? mask_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
|
||||
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
||||
short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
|
||||
uchar4 tmp_data = convert_uchar4_sat(tmp);
|
||||
|
||||
@@ -65,10 +65,16 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src
|
||||
int dst_start = mad24(y, dst_step, dst_offset);
|
||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
||||
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
|
||||
|
||||
if(src1_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
|
||||
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
||||
int4 tmp = convert_int4_sat(src1_data) + src2_data;
|
||||
uchar4 tmp_data = convert_uchar4_sat(tmp);
|
||||
|
||||
@@ -68,10 +68,23 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste
|
||||
int dst_start = mad24(y, dst_step, dst_offset);
|
||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
||||
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||
int mask_index_fix = mask_index < 0 ? 0 : mask_index;
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
|
||||
uchar4 mask_data = vload4(0, mask + mask_index);
|
||||
uchar4 mask_data = vload4(0, mask + mask_index_fix);
|
||||
if(src1_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
if(mask_index < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (mask_index == -2) ? mask_data.zwxy:mask_data.yzwx;
|
||||
mask_data.xyzw = (mask_index == -1) ? mask_data.wxyz:tmp.xyzw;
|
||||
}
|
||||
|
||||
uchar4 data = *((__global uchar4 *)(dst + dst_index));
|
||||
int4 tmp = convert_int4_sat(src1_data) + src2_data;
|
||||
|
||||
@@ -71,9 +71,22 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of
|
||||
int dst_end_1 = mad24(rows - y - 1, dst_step, dst_offset + dst_step1);
|
||||
int dst_index_0 = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||
int dst_index_1 = mad24(rows - y - 1, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||
|
||||
uchar4 src_data_0 = vload4(0, src + src_index_0);
|
||||
uchar4 src_data_1 = vload4(0, src + src_index_1);
|
||||
int src1_index_fix = src_index_0 < 0 ? 0 : src_index_0;
|
||||
int src2_index_fix = src_index_1 < 0 ? 0 : src_index_1;
|
||||
uchar4 src_data_0 = vload4(0, src + src1_index_fix);
|
||||
uchar4 src_data_1 = vload4(0, src + src2_index_fix);
|
||||
if(src_index_0 < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src_index_0 == -2) ? src_data_0.zwxy:src_data_0.yzwx;
|
||||
src_data_0.xyzw = (src_index_0 == -1) ? src_data_0.wxyz:tmp.xyzw;
|
||||
}
|
||||
if(src_index_1 < 0)
|
||||
{
|
||||
uchar4 tmp;
|
||||
tmp.xyzw = (src_index_1 == -2) ? src_data_1.zwxy:src_data_1.yzwx;
|
||||
src_data_1.xyzw = (src_index_1 == -1) ? src_data_1.wxyz:tmp.xyzw;
|
||||
}
|
||||
|
||||
uchar4 dst_data_0 = *((__global uchar4 *)(dst + dst_index_0));
|
||||
uchar4 dst_data_1 = *((__global uchar4 *)(dst + dst_index_1));
|
||||
|
||||
237
modules/ocl/src/kernels/build_warps.cl
Normal file
237
modules/ocl/src/kernels/build_warps.cl
Normal file
@@ -0,0 +1,237 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
__kernel
|
||||
void buildWarpPlaneMaps
|
||||
(
|
||||
__global float * map_x,
|
||||
__global float * map_y,
|
||||
__constant float * KRT,
|
||||
int tl_u,
|
||||
int tl_v,
|
||||
int cols,
|
||||
int rows,
|
||||
int step_x,
|
||||
int step_y,
|
||||
float scale
|
||||
)
|
||||
{
|
||||
int du = get_global_id(0);
|
||||
int dv = get_global_id(1);
|
||||
step_x /= sizeof(float);
|
||||
step_y /= sizeof(float);
|
||||
|
||||
__constant float * ck_rinv = KRT;
|
||||
__constant float * ct = KRT + 9;
|
||||
|
||||
if (du < cols && dv < rows)
|
||||
{
|
||||
float u = tl_u + du;
|
||||
float v = tl_v + dv;
|
||||
float x, y;
|
||||
|
||||
float x_ = u / scale - ct[0];
|
||||
float y_ = v / scale - ct[1];
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);
|
||||
|
||||
x /= z;
|
||||
y /= z;
|
||||
|
||||
map_x[dv * step_x + du] = x;
|
||||
map_y[dv * step_y + du] = y;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void buildWarpCylindricalMaps
|
||||
(
|
||||
__global float * map_x,
|
||||
__global float * map_y,
|
||||
__constant float * ck_rinv,
|
||||
int tl_u,
|
||||
int tl_v,
|
||||
int cols,
|
||||
int rows,
|
||||
int step_x,
|
||||
int step_y,
|
||||
float scale
|
||||
)
|
||||
{
|
||||
int du = get_global_id(0);
|
||||
int dv = get_global_id(1);
|
||||
step_x /= sizeof(float);
|
||||
step_y /= sizeof(float);
|
||||
|
||||
if (du < cols && dv < rows)
|
||||
{
|
||||
float u = tl_u + du;
|
||||
float v = tl_v + dv;
|
||||
float x, y;
|
||||
|
||||
u /= scale;
|
||||
float x_ = sin(u);
|
||||
float y_ = v / scale;
|
||||
float z_ = cos(u);
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
|
||||
|
||||
if (z > 0) { x /= z; y /= z; }
|
||||
else x = y = -1;
|
||||
|
||||
map_x[dv * step_x + du] = x;
|
||||
map_y[dv * step_y + du] = y;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void buildWarpSphericalMaps
|
||||
(
|
||||
__global float * map_x,
|
||||
__global float * map_y,
|
||||
__constant float * ck_rinv,
|
||||
int tl_u,
|
||||
int tl_v,
|
||||
int cols,
|
||||
int rows,
|
||||
int step_x,
|
||||
int step_y,
|
||||
float scale
|
||||
)
|
||||
{
|
||||
int du = get_global_id(0);
|
||||
int dv = get_global_id(1);
|
||||
step_x /= sizeof(float);
|
||||
step_y /= sizeof(float);
|
||||
|
||||
if (du < cols && dv < rows)
|
||||
{
|
||||
float u = tl_u + du;
|
||||
float v = tl_v + dv;
|
||||
float x, y;
|
||||
|
||||
v /= scale;
|
||||
u /= scale;
|
||||
|
||||
float sinv = sin(v);
|
||||
float x_ = sinv * sin(u);
|
||||
float y_ = - cos(v);
|
||||
float z_ = sinv * cos(u);
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
|
||||
|
||||
if (z > 0) { x /= z; y /= z; }
|
||||
else x = y = -1;
|
||||
|
||||
map_x[dv * step_x + du] = x;
|
||||
map_y[dv * step_y + du] = y;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void buildWarpAffineMaps
|
||||
(
|
||||
__global float * xmap,
|
||||
__global float * ymap,
|
||||
__constant float * c_warpMat,
|
||||
int cols,
|
||||
int rows,
|
||||
int step_x,
|
||||
int step_y
|
||||
)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
step_x /= sizeof(float);
|
||||
step_y /= sizeof(float);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
|
||||
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
|
||||
|
||||
map_x[y * step_x + x] = xcoo;
|
||||
map_y[y * step_y + x] = ycoo;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void buildWarpPerspectiveMaps
|
||||
(
|
||||
__global float * xmap,
|
||||
__global float * ymap,
|
||||
__constant float * c_warpMat,
|
||||
int cols,
|
||||
int rows,
|
||||
int step_x,
|
||||
int step_y
|
||||
)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
step_x /= sizeof(float);
|
||||
step_y /= sizeof(float);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
|
||||
|
||||
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
|
||||
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
|
||||
|
||||
map_x[y * step_x + x] = xcoo;
|
||||
map_y[y * step_y + x] = ycoo;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -254,7 +254,8 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch
|
||||
//ss = convert_uint4(src[cur_addr]);
|
||||
|
||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||
ss = convert_uint4(src[(startY+i)*(src_step>>2) + cur_col]);
|
||||
if(con)
|
||||
ss = convert_uint4(src[(startY+i)*(src_step>>2) + cur_col]);
|
||||
|
||||
data[i] = con ? ss : 0;
|
||||
}
|
||||
@@ -269,6 +270,7 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch
|
||||
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
||||
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
||||
|
||||
|
||||
data[i] = convert_uint4(src[selected_row * (src_step>>2) + selected_col]);
|
||||
}
|
||||
|
||||
@@ -334,11 +336,12 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
con = startX+col >= 0 && startX+col < src_whole_cols && startY+i >= 0 && startY+i < src_whole_rows;
|
||||
// int cur_addr = clamp((startY+i)*(src_step>>2)+(startX+col),0,end_addr);
|
||||
// ss = src[cur_addr];
|
||||
|
||||
//int cur_addr = clamp((startY+i)*(src_step>>2)+(startX+col),0,end_addr);
|
||||
//ss = src[cur_addr];
|
||||
|
||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||
ss = src[(startY+i)*(src_step>>2) + cur_col];
|
||||
//ss = src[(startY+i)*(src_step>>2) + cur_col];
|
||||
ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>2) + cur_col]:0;
|
||||
|
||||
data[i] = con ? ss : 0.f;
|
||||
}
|
||||
@@ -422,7 +425,8 @@ __kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global floa
|
||||
//ss = src[cur_addr];
|
||||
|
||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||
ss = src[(startY+i)*(src_step>>4) + cur_col];
|
||||
//ss = src[(startY+i)*(src_step>>4) + cur_col];
|
||||
ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>4) + cur_col]:0;
|
||||
|
||||
data[i] = con ? ss : (float4)(0.0,0.0,0.0,0.0);
|
||||
}
|
||||
|
||||
@@ -31,84 +31,8 @@
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//
|
||||
|
||||
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf :enable
|
||||
__kernel
|
||||
void bilateral4(__global uchar4 *dst,
|
||||
__global uchar4 *src,
|
||||
int rows,
|
||||
int cols,
|
||||
int channels,
|
||||
int radius,
|
||||
int wholerows,
|
||||
int wholecols,
|
||||
int src_step,
|
||||
int dst_step,
|
||||
int src_offset,
|
||||
int dst_offset,
|
||||
__constant float *sigClr,
|
||||
__constant float *sigSpc)
|
||||
{
|
||||
uint lidx = get_local_id(0);
|
||||
uint lidy = get_local_id(1);
|
||||
|
||||
uint gdx = get_global_id(0);
|
||||
uint gdy = get_global_id(1);
|
||||
|
||||
uint gidx = gdx >=cols?cols-1:gdx;
|
||||
uint gidy = gdy >=rows?rows-1:gdy;
|
||||
|
||||
uchar4 p,q,tmp;
|
||||
|
||||
float4 pf = 0,pq = 0,pd = 0;
|
||||
float wt =0;
|
||||
|
||||
int r = radius;
|
||||
int ij = 0;
|
||||
int ct = 0;
|
||||
|
||||
uint index_src = src_offset/4 + gidy*src_step/4 + gidx;
|
||||
uint index_dst = dst_offset/4 + gidy*dst_step/4 + gidx;
|
||||
|
||||
p = src[index_src];
|
||||
|
||||
uint gx,gy;
|
||||
uint src_index,dst_index;
|
||||
|
||||
for(int ii = -r;ii<r+1;ii++)
|
||||
{
|
||||
for(int jj =-r;jj<r+1;jj++)
|
||||
{
|
||||
ij = ii*ii+jj*jj;
|
||||
if(ij > mul24(radius,radius)) continue;
|
||||
gx = gidx + jj;
|
||||
gy = gidy + ii;
|
||||
|
||||
src_index = src_offset/4 + gy * src_step/4 + gx;
|
||||
q = src[src_index];
|
||||
|
||||
|
||||
ct = abs(p.x-q.x)+abs(p.y-q.y)+abs(p.z-q.z);
|
||||
wt =sigClr[ct]*sigSpc[(ii+radius)*(2*radius+1)+jj+radius];
|
||||
|
||||
pf.x += q.x*wt;
|
||||
pf.y += q.y*wt;
|
||||
pf.z += q.z*wt;
|
||||
// pf.w += q.w*wt;
|
||||
|
||||
pq += wt;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
pd = pf/pq;
|
||||
dst[index_dst] = convert_uchar4_rte(pd);
|
||||
}
|
||||
|
||||
__kernel void bilateral(__global uchar *dst,
|
||||
__kernel void bilateral_C1_D0(__global uchar *dst,
|
||||
__global const uchar *src,
|
||||
const int dst_rows,
|
||||
const int dst_cols,
|
||||
@@ -128,8 +52,8 @@ __kernel void bilateral(__global uchar *dst,
|
||||
if((gidy<dst_rows) && (gidx<dst_cols))
|
||||
{
|
||||
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
|
||||
int dst_addr = mad24(gidy,src_step,gidx+dst_offset);
|
||||
float sum = 0, wsum = 0;
|
||||
int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
|
||||
float sum = 0.f, wsum = 0.f;
|
||||
|
||||
int val0 = (int)src[src_addr];
|
||||
for(int k = 0; k < maxk; k++ )
|
||||
@@ -142,4 +66,73 @@ __kernel void bilateral(__global uchar *dst,
|
||||
dst[dst_addr] = convert_uchar_rtz(sum/wsum+0.5f);
|
||||
}
|
||||
}
|
||||
__kernel void bilateral2_C1_D0(__global uchar *dst,
|
||||
__global const uchar *src,
|
||||
const int dst_rows,
|
||||
const int dst_cols,
|
||||
const int maxk,
|
||||
const int radius,
|
||||
const int dst_step,
|
||||
const int dst_offset,
|
||||
const int src_step,
|
||||
const int src_rows,
|
||||
const int src_cols,
|
||||
__constant float *color_weight,
|
||||
__constant float *space_weight,
|
||||
__constant int *space_ofs)
|
||||
{
|
||||
int gidx = get_global_id(0)<<2;
|
||||
int gidy = get_global_id(1);
|
||||
if((gidy<dst_rows) && (gidx<dst_cols))
|
||||
{
|
||||
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
|
||||
int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
|
||||
float4 sum = (float4)(0.f), wsum = (float4)(0.f);
|
||||
|
||||
int4 val0 = convert_int4(vload4(0,src+src_addr));
|
||||
for(int k = 0; k < maxk; k++ )
|
||||
{
|
||||
int4 val = convert_int4(vload4(0,src+src_addr + space_ofs[k]));
|
||||
float4 w = (float4)(space_weight[k])*(float4)(color_weight[abs(val.x - val0.x)],color_weight[abs(val.y - val0.y)],color_weight[abs(val.z - val0.z)],color_weight[abs(val.w - val0.w)]);
|
||||
sum += convert_float4(val)*w;
|
||||
wsum += w;
|
||||
}
|
||||
*(__global uchar4*)(dst+dst_addr) = convert_uchar4_rtz(sum/wsum+0.5f);
|
||||
}
|
||||
}
|
||||
__kernel void bilateral_C4_D0(__global uchar4 *dst,
|
||||
__global const uchar4 *src,
|
||||
const int dst_rows,
|
||||
const int dst_cols,
|
||||
const int maxk,
|
||||
const int radius,
|
||||
const int dst_step,
|
||||
const int dst_offset,
|
||||
const int src_step,
|
||||
const int src_rows,
|
||||
const int src_cols,
|
||||
__constant float *color_weight,
|
||||
__constant float *space_weight,
|
||||
__constant int *space_ofs)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
if((gidy<dst_rows) && (gidx<dst_cols))
|
||||
{
|
||||
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
|
||||
int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
|
||||
float4 sum = (float4)0.f;
|
||||
float wsum = 0.f;
|
||||
|
||||
int4 val0 = convert_int4(src[src_addr]);
|
||||
for(int k = 0; k < maxk; k++ )
|
||||
{
|
||||
int4 val = convert_int4(src[src_addr + space_ofs[k]]);
|
||||
float w = space_weight[k]*color_weight[abs(val.x - val0.x)+abs(val.y - val0.y)+abs(val.z - val0.z)];
|
||||
sum += convert_float4(val)*(float4)w;
|
||||
wsum += w;
|
||||
}
|
||||
wsum=1.f/wsum;
|
||||
dst[dst_addr] = convert_uchar4_rtz(sum*(float4)wsum+(float4)0.5f);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,16 +144,18 @@ __kernel void __attribute__((reqd_work_group_size(1,HISTOGRAM256_BIN_COUNT,1)))c
|
||||
int rowIndex = mad24(gy, gn, gx);
|
||||
// rowIndex &= (PARTIAL_HISTOGRAM256_COUNT - 1);
|
||||
|
||||
__local int subhist[HISTOGRAM256_LOCAL_MEM_SIZE + 1];
|
||||
__local int subhist[HISTOGRAM256_LOCAL_MEM_SIZE];
|
||||
subhist[lidy] = 0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
gidx = ((gidx>=left_col) ? (gidx+cols) : gidx);
|
||||
int src_index = src_offset + mad24(gidy, src_step, gidx);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int p = (int)src[src_index];
|
||||
p = gidy >= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p;
|
||||
atomic_inc(subhist + p);
|
||||
if(gidy<rows)
|
||||
{
|
||||
int src_index = src_offset + mad24(gidy, src_step, gidx);
|
||||
int p = (int)src[src_index];
|
||||
// p = gidy >= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p;
|
||||
atomic_inc(subhist + p);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
globalHist[mad24(rowIndex, hist_step, lidy)] += subhist[lidy];
|
||||
|
||||
252
modules/ocl/src/kernels/interpolate_frames.cl
Normal file
252
modules/ocl/src/kernels/interpolate_frames.cl
Normal file
@@ -0,0 +1,252 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
||||
|
||||
// Image read mode
|
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
|
||||
|
||||
// atomic add for 32bit floating point
|
||||
inline void atomic_addf(volatile __global float *source, const float operand) {
|
||||
union {
|
||||
unsigned int intVal;
|
||||
float floatVal;
|
||||
} newVal;
|
||||
union {
|
||||
unsigned int intVal;
|
||||
float floatVal;
|
||||
} prevVal;
|
||||
do {
|
||||
prevVal.floatVal = *source;
|
||||
newVal.floatVal = prevVal.floatVal + operand;
|
||||
} while (atomic_cmpxchg((volatile __global unsigned int *)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);
|
||||
}
|
||||
|
||||
__kernel void memsetKernel(
|
||||
float val,
|
||||
__global float * image,
|
||||
int width,
|
||||
int height,
|
||||
int step, // in element
|
||||
int offset
|
||||
)
|
||||
{
|
||||
if(get_global_id(0) >= width || get_global_id(1) >= height)
|
||||
{
|
||||
return;
|
||||
}
|
||||
image += offset;
|
||||
image[get_global_id(0) + get_global_id(1) * step] = val;
|
||||
}
|
||||
|
||||
__kernel void normalizeKernel(
|
||||
__global float * buffer,
|
||||
int width,
|
||||
int height,
|
||||
int step,
|
||||
int f_offset,
|
||||
int d_offset
|
||||
)
|
||||
{
|
||||
__global float * factors = buffer + f_offset;
|
||||
__global float * dst = buffer + d_offset;
|
||||
|
||||
int j = get_global_id(0);
|
||||
int i = get_global_id(1);
|
||||
|
||||
if(j >= width || i >= height)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float scale = factors[step * i + j];
|
||||
float invScale = (scale == 0.0f) ? 1.0f : (1.0f / scale);
|
||||
|
||||
dst[step * i + j] *= invScale;
|
||||
}
|
||||
|
||||
__kernel void forwardWarpKernel(
|
||||
__global const float * src,
|
||||
__global float * buffer,
|
||||
__global const float * u,
|
||||
__global const float * v,
|
||||
const int w,
|
||||
const int h,
|
||||
const int flow_stride,
|
||||
const int image_stride,
|
||||
const int factor_offset,
|
||||
const int dst_offset,
|
||||
const float time_scale
|
||||
)
|
||||
{
|
||||
int j = get_global_id(0);
|
||||
int i = get_global_id(1);
|
||||
|
||||
if (i >= h || j >= w) return;
|
||||
|
||||
volatile __global float * normalization_factor = (volatile __global float *) buffer + factor_offset;
|
||||
volatile __global float * dst = (volatile __global float *)buffer + dst_offset;
|
||||
|
||||
int flow_row_offset = i * flow_stride;
|
||||
int image_row_offset = i * image_stride;
|
||||
|
||||
//bottom left corner of a target pixel
|
||||
float cx = u[flow_row_offset + j] * time_scale + (float)j + 1.0f;
|
||||
float cy = v[flow_row_offset + j] * time_scale + (float)i + 1.0f;
|
||||
// pixel containing bottom left corner
|
||||
float px;
|
||||
float py;
|
||||
float dx = modf(cx, &px);
|
||||
float dy = modf(cy, &py);
|
||||
// target pixel integer coords
|
||||
int tx;
|
||||
int ty;
|
||||
tx = (int) px;
|
||||
ty = (int) py;
|
||||
float value = src[image_row_offset + j];
|
||||
float weight;
|
||||
// fill pixel containing bottom right corner
|
||||
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||
{
|
||||
weight = dx * dy;
|
||||
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
||||
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
||||
}
|
||||
|
||||
// fill pixel containing bottom left corner
|
||||
tx -= 1;
|
||||
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||
{
|
||||
weight = (1.0f - dx) * dy;
|
||||
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
||||
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
||||
}
|
||||
|
||||
// fill pixel containing upper left corner
|
||||
ty -= 1;
|
||||
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||
{
|
||||
weight = (1.0f - dx) * (1.0f - dy);
|
||||
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
||||
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
||||
}
|
||||
|
||||
// fill pixel containing upper right corner
|
||||
tx += 1;
|
||||
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||
{
|
||||
weight = dx * (1.0f - dy);
|
||||
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
||||
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
||||
}
|
||||
}
|
||||
|
||||
// define buffer offsets
|
||||
enum
|
||||
{
|
||||
O0_OS = 0,
|
||||
O1_OS,
|
||||
U_OS,
|
||||
V_OS,
|
||||
UR_OS,
|
||||
VR_OS
|
||||
};
|
||||
|
||||
__kernel void blendFramesKernel(
|
||||
image2d_t tex_src0,
|
||||
image2d_t tex_src1,
|
||||
__global float * buffer,
|
||||
__global float * out,
|
||||
int w,
|
||||
int h,
|
||||
int step,
|
||||
float theta
|
||||
)
|
||||
{
|
||||
__global float * u = buffer + h * step * U_OS;
|
||||
__global float * v = buffer + h * step * V_OS;
|
||||
__global float * ur = buffer + h * step * UR_OS;
|
||||
__global float * vr = buffer + h * step * VR_OS;
|
||||
__global float * o0 = buffer + h * step * O0_OS;
|
||||
__global float * o1 = buffer + h * step * O1_OS;
|
||||
|
||||
int ix = get_global_id(0);
|
||||
int iy = get_global_id(1);
|
||||
|
||||
if(ix >= w || iy >= h) return;
|
||||
|
||||
int pos = ix + step * iy;
|
||||
|
||||
float _u = u[pos];
|
||||
float _v = v[pos];
|
||||
|
||||
float _ur = ur[pos];
|
||||
float _vr = vr[pos];
|
||||
|
||||
float x = (float)ix + 0.5f;
|
||||
float y = (float)iy + 0.5f;
|
||||
bool b0 = o0[pos] > 1e-4f;
|
||||
bool b1 = o1[pos] > 1e-4f;
|
||||
|
||||
float2 coord0 = (float2)(x - _u * theta, y - _v * theta);
|
||||
float2 coord1 = (float2)(x + _u * (1.0f - theta), y + _v * (1.0f - theta));
|
||||
|
||||
if (b0 && b1)
|
||||
{
|
||||
// pixel is visible on both frames
|
||||
out[pos] = read_imagef(tex_src0, sampler, coord0).x * (1.0f - theta) +
|
||||
read_imagef(tex_src1, sampler, coord1).x * theta;
|
||||
}
|
||||
else if (b0)
|
||||
{
|
||||
// visible on the first frame only
|
||||
out[pos] = read_imagef(tex_src0, sampler, coord0).x;
|
||||
}
|
||||
else
|
||||
{
|
||||
// visible on the second frame only
|
||||
out[pos] = read_imagef(tex_src1, sampler, coord1).x;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user