fixed and generalized ocl::blendLinear
This commit is contained in:
@@ -49,35 +49,51 @@
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
|
||||
void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
|
||||
oclMat &result)
|
||||
void cv::ocl::blendLinear(const oclMat &src1, const oclMat &src2, const oclMat &weights1, const oclMat &weights2,
|
||||
oclMat &dst)
|
||||
{
|
||||
cv::ocl::Context *ctx = img1.clCxt;
|
||||
assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
|
||||
int channels = img1.oclchannels();
|
||||
int depth = img1.depth();
|
||||
int rows = img1.rows;
|
||||
int cols = img1.cols;
|
||||
int istep = img1.step1();
|
||||
int wstep = weights1.step1();
|
||||
size_t globalSize[] = {cols * channels / 4, rows, 1};
|
||||
size_t localSize[] = {256, 1, 1};
|
||||
CV_Assert(src1.depth() <= CV_32F);
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
CV_Assert(weights1.size() == weights2.size() && weights1.size() == src1.size() &&
|
||||
weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1);
|
||||
|
||||
dst.create(src1.size(), src1.type());
|
||||
|
||||
size_t globalSize[] = { dst.cols, dst.rows, 1};
|
||||
size_t localSize[] = { 16, 16, 1 };
|
||||
|
||||
int depth = dst.depth(), ocn = dst.oclchannels();
|
||||
int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
|
||||
int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize();
|
||||
int weight1_step = weights1.step / weights1.elemSize(), weight1_offset = weights1.offset / weights1.elemSize();
|
||||
int weight2_step = weights2.step / weights2.elemSize(), weight2_offset = weights2.offset / weights2.elemSize();
|
||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||
|
||||
const char * const channelMap[] = { "", "", "2", "4", "4" };
|
||||
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
||||
std::string buildOptions = format("-D T=%s%s -D convertToT=convert_%s%s%s -D FT=float%s -D convertToFT=convert_float%s",
|
||||
typeMap[depth], channelMap[ocn], typeMap[depth], channelMap[ocn],
|
||||
depth >= CV_32S ? "" : "_sat_rte", channelMap[ocn], channelMap[ocn]);
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
result.create(img1.size(), CV_MAKE_TYPE(depth,img1.channels()));
|
||||
if(globalSize[0] != 0)
|
||||
{
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
|
||||
std::string kernelName = "BlendLinear";
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_step ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_step ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
|
||||
|
||||
openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
|
||||
}
|
||||
openCLExecuteKernel(src1.clCxt, &blend_linear, "blendLinear", globalSize, localSize, args,
|
||||
-1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
||||
@@ -42,99 +42,37 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
__kernel void BlendLinear_C1_D0(
|
||||
__global uchar4 *dst,
|
||||
__global uchar4 *img1,
|
||||
__global uchar4 *img2,
|
||||
__global float4 *weight1,
|
||||
__global float4 *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
__kernel void blendLinear(__global const T * src1, int src1_offset, int src1_step,
|
||||
__global const T * src2, int src2_offset, int src2_step,
|
||||
__global const float * weight1, int weight1_offset, int weight1_step,
|
||||
__global const float * weight2, int weight2_offset, int weight2_step,
|
||||
__global T * dst, int dst_offset, int dst_step,
|
||||
int rows, int cols)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
if (idx << 2 < cols && idy < rows)
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int pos = mad24(idy,istep >> 2,idx);
|
||||
int wpos = mad24(idy,wstep >> 2,idx);
|
||||
float4 w1 = weight1[wpos], w2 = weight2[wpos];
|
||||
dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 +
|
||||
convert_float4(img2[pos]) * w2) / (w1 + w2 + 1e-5f));
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C4_D0(
|
||||
__global uchar4 *dst,
|
||||
__global uchar4 *img1,
|
||||
__global uchar4 *img2,
|
||||
__global float *weight1,
|
||||
__global float *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
if (idx < cols && idy < rows)
|
||||
{
|
||||
int pos = mad24(idy,istep >> 2,idx);
|
||||
int wpos = mad24(idy,wstep, idx);
|
||||
float w1 = weight1[wpos];
|
||||
float w2 = weight2[wpos];
|
||||
dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 +
|
||||
convert_float4(img2[pos]) * w2) / (w1 + w2 + 1e-5f));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void BlendLinear_C1_D5(
|
||||
__global float4 *dst,
|
||||
__global float4 *img1,
|
||||
__global float4 *img2,
|
||||
__global float4 *weight1,
|
||||
__global float4 *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
if (idx << 2 < cols && idy < rows)
|
||||
{
|
||||
int pos = mad24(idy,istep >> 2,idx);
|
||||
int wpos = mad24(idy,wstep >> 2,idx);
|
||||
float4 w1 = weight1[wpos], w2 = weight2[wpos];
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C4_D5(
|
||||
__global float4 *dst,
|
||||
__global float4 *img1,
|
||||
__global float4 *img2,
|
||||
__global float *weight1,
|
||||
__global float *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
if (idx < cols && idy < rows)
|
||||
{
|
||||
int pos = mad24(idy,istep >> 2,idx);
|
||||
int wpos = mad24(idy,wstep, idx);
|
||||
float w1 = weight1[wpos];
|
||||
float w2 = weight2[wpos];
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
int src1_index = mad24(y, src1_step, src1_offset + x);
|
||||
int src2_index = mad24(y, src2_step, src2_offset + x);
|
||||
int weight1_index = mad24(y, weight1_step, weight1_offset + x);
|
||||
int weight2_index = mad24(y, weight2_step, weight2_offset + x);
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
FT w1 = (FT)(weight1[weight1_index]), w2 = (FT)(weight2[weight2_index]);
|
||||
FT den = w1 + w2 + (FT)(1e-5f);
|
||||
FT num = w1 * convertToFT(src1[src1_index]) + w2 * convertToFT(src2[src2_index]);
|
||||
|
||||
dst[dst_index] = convertToT(num / den);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user