From 5f81988699035ea16d1cdc7eb6367aaec520d0ee Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 3 Oct 2013 20:04:04 +0400 Subject: [PATCH] refactored arithm binary operations in order to make them more scalable --- modules/ocl/src/arithm.cpp | 16 +-- modules/ocl/src/matrix_operations.cpp | 32 ++--- modules/ocl/src/opencl/arithm_add.cl | 117 +++++++----------- modules/ocl/src/opencl/arithm_add_mask.cl | 20 ++- modules/ocl/src/opencl/arithm_add_scalar.cl | 63 ++++------ .../ocl/src/opencl/arithm_add_scalar_mask.cl | 20 ++- modules/ocl/test/test_arithm.cpp | 2 +- 7 files changed, 135 insertions(+), 135 deletions(-) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index f34e0f730..09d250ae3 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -89,11 +89,11 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const size_t localThreads[3] = { 16, 16, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; - std::string kernelName = op_type == ABS_DIFF ? "arithm_absdiff" : "arithm_binary_op"; + std::string kernelName = "arithm_binary_op"; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; const char * const WTypeMap[] = { "short", "short", "int", "int", "int", "float", "double" }; - const char operationsMap[] = { '+', '-', '*', '/', '-' }; + const char * const funcMap[] = { "FUNC_ADD", "FUNC_SUB", "FUNC_MUL", "FUNC_DIV", "FUNC_ABS_DIFF" }; const char * const channelMap[] = { "", "", "2", "4", "4" }; bool haveScalar = use_scalar || src2.empty(); @@ -105,12 +105,12 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const else if (op_type == MUL) WDepth = hasDouble && (depth == CV_32S || depth == CV_64F) ? CV_64F : CV_32F; - std::string buildOptions = format("-D T=%s%s -D WT=%s%s -D convertToT=convert_%s%s%s -D Operation=%c" - " -D convertToWT=convert_%s%s", + std::string buildOptions = format("-D T=%s%s -D WT=%s%s -D convertToT=convert_%s%s%s -D %s " + "-D convertToWT=convert_%s%s", typeMap[depth], channelMap[oclChannels], WTypeMap[WDepth], channelMap[oclChannels], typeMap[depth], channelMap[oclChannels], (depth >= CV_32F ? "" : (depth == CV_32S ? "_rte" : "_sat_rte")), - operationsMap[op_type], WTypeMap[WDepth], channelMap[oclChannels]); + funcMap[op_type], WTypeMap[WDepth], channelMap[oclChannels]); vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); @@ -124,6 +124,9 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); kernelName += "_mat"; + + if (haveScalar) + buildOptions += " -D HAVE_SCALAR"; } if (haveScalar) @@ -146,9 +149,6 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const kernelName += "_mask"; } - if (op_type == DIV) - kernelName += "_div"; - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 )); diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 80b2f7d81..3b0e41731 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -366,23 +366,23 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri #ifdef CL_VERSION_1_2 // this enables backwards portability to // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support - if (Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2) && - dst.offset == 0 && dst.cols == dst.wholecols) - { - const int sizeofMap[][7] = - { - { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) }, - { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) }, - { 0 , 0 , 0 , 0 , 0 , 0 , 0 }, - { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) }, - }; - int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; +// if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) && +// dst.offset == 0 && dst.cols == dst.wholecols) +// { +// const int sizeofMap[][7] = +// { +// { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) }, +// { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) }, +// { 0 , 0 , 0 , 0 , 0 , 0 , 0 }, +// { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) }, +// }; +// int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; - clEnqueueFillBuffer(getClCommandQueue(dst.clCxt), - (cl_mem)dst.data, (void*)mat.data, sizeofGeneric, - 0, dst.step * dst.rows, 0, NULL, NULL); - } - else +// clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), +// (cl_mem)dst.data, (void*)mat.data, sizeofGeneric, +// 0, dst.step * dst.rows, 0, NULL, NULL); +// } +// else #endif { oclMat m(mat); diff --git a/modules/ocl/src/opencl/arithm_add.cl b/modules/ocl/src/opencl/arithm_add.cl index 38834e766..40caba5a9 100644 --- a/modules/ocl/src/opencl/arithm_add.cl +++ b/modules/ocl/src/opencl/arithm_add.cl @@ -52,14 +52,50 @@ #endif #endif +#if defined (FUNC_ADD) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + convertToWT(src2[src2_index])); +#endif + +#if defined (FUNC_SUB) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - convertToWT(src2[src2_index])); +#endif + +#if defined (FUNC_MUL) +#if defined (HAVE_SCALAR) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0] * convertToWT(src2[src2_index])); +#else +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index])); +#endif +#endif + +#if defined (FUNC_DIV) +#if defined (HAVE_SCALAR) +#define EXPRESSION T zero = (T)(0); \ + dst[dst_index] = src2[src2_index] == zero ? zero : \ + convertToT(convertToWT(src1[src1_index]) * scalar[0] / convertToWT(src2[src2_index])); +#else +#define EXPRESSION T zero = (T)(0); \ + dst[dst_index] = src2[src2_index] == zero ? zero : \ + convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index])); +#endif +#endif + +#if defined (FUNC_ABS_DIFF) +#define EXPRESSION WT value = convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]); \ + value = value > (WT)(0) ? value : -value; \ + dst[dst_index] = convertToT(value); +#endif + ////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////// ADD //////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// +#ifndef HAVE_SCALAR + __kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_offset, - __global T *src2, int src2_step, int src2_offset, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) + __global T *src2, int src2_step, int src2_offset, + __global T *dst, int dst_step, int dst_offset, + int cols, int rows) { int x = get_global_id(0); int y = get_global_id(1); @@ -70,55 +106,18 @@ __kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_off int src2_index = mad24(y, src2_step, x + src2_offset); int dst_index = mad24(y, dst_step, x + dst_offset); - dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation convertToWT(src2[src2_index])); + EXPRESSION } } -__kernel void arithm_binary_op_mat_div(__global T *src1, int src1_step, int src1_offset, - __global T *src2, int src2_step, int src2_offset, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) -{ - int x = get_global_id(0); - int y = get_global_id(1); +#else - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, x + src1_offset); - int src2_index = mad24(y, src2_step, x + src2_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); - - T zero = (T)(0); - dst[dst_index] = src2[src2_index] == zero ? zero : convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index])); - } -} - -__kernel void arithm_absdiff_mat(__global T *src1, int src1_step, int src1_offset, - __global T *src2, int src2_step, int src2_offset, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, x + src1_offset); - int src2_index = mad24(y, src2_step, x + src2_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); - - WT value = convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]); - value = value > (WT)(0) ? value : -value; - dst[dst_index] = convertToT(value); - } -} - -// add mat with scale for multiply +// add mat with scale __kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int src1_offset, - __global T *src2, int src2_step, int src2_offset, - __global WT *scalar, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) + __global T *src2, int src2_step, int src2_offset, + __global WT *scalar, + __global T *dst, int dst_step, int dst_offset, + int cols, int rows) { int x = get_global_id(0); int y = get_global_id(1); @@ -129,28 +128,8 @@ __kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int s int src2_index = mad24(y, src2_step, x + src2_offset); int dst_index = mad24(y, dst_step, x + dst_offset); - dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0] * convertToWT(src2[src2_index])); + EXPRESSION } } -// add mat with scale for divide -__kernel void arithm_binary_op_mat_scalar_div(__global T *src1, int src1_step, int src1_offset, - __global T *src2, int src2_step, int src2_offset, - __global WT *scalar, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, x + src1_offset); - int src2_index = mad24(y, src2_step, x + src2_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); - - T zero = (T)(0); - dst[dst_index] = src2[src2_index] == zero ? zero : - convertToT(convertToWT(src1[src1_index]) * scalar[0] / convertToWT(src2[src2_index])); - } -} +#endif diff --git a/modules/ocl/src/opencl/arithm_add_mask.cl b/modules/ocl/src/opencl/arithm_add_mask.cl index 52dbfc455..c3958bf1f 100644 --- a/modules/ocl/src/opencl/arithm_add_mask.cl +++ b/modules/ocl/src/opencl/arithm_add_mask.cl @@ -51,6 +51,24 @@ #endif #endif +#if defined (FUNC_ADD) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + convertToWT(src2[src2_index])); +#endif + +#if defined (FUNC_SUB) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - convertToWT(src2[src2_index])); +#endif + +#if defined (FUNC_MUL) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index])); +#endif + +#if defined (FUNC_DIV) +#define EXPRESSION T zero = (T)(0); \ + dst[dst_index] = src2[src2_index] == zero ? zero : \ + convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index])); +#endif + ////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////// add with mask ////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////// @@ -73,7 +91,7 @@ __kernel void arithm_binary_op_mat_mask(__global T * src1, int src1_step, int sr int src2_index = mad24(y, src2_step, x + src2_offset); int dst_index = mad24(y, dst_step, dst_offset + x); - dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation convertToWT(src2[src2_index])); + EXPRESSION } } } diff --git a/modules/ocl/src/opencl/arithm_add_scalar.cl b/modules/ocl/src/opencl/arithm_add_scalar.cl index 4e0c7fc5f..4a0167fd5 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar.cl @@ -51,6 +51,29 @@ #endif #endif +#if defined (FUNC_ADD) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar[0]); +#endif + +#if defined (FUNC_SUB) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar[0]); +#endif + +#if defined (FUNC_MUL) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0]); +#endif + +#if defined (FUNC_DIV) +#define EXPRESSION T zero = (T)(0); \ + dst[dst_index] = src1[src1_index] == zero ? zero : convertToT(scalar[0] / convertToWT(src1[src1_index])); +#endif + +#if defined (FUNC_ABS_DIFF) +#define EXPRESSION WT value = convertToWT(src1[src1_index]) - scalar[0]; \ + value = value > (WT)(0) ? value : -value; \ + dst[dst_index] = convertToT(value); +#endif + /////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////// Add with scalar ///////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// @@ -68,44 +91,6 @@ __kernel void arithm_binary_op_scalar (__global T *src1, int src1_step, int src1 int src1_index = mad24(y, src1_step, x + src1_offset); int dst_index = mad24(y, dst_step, x + dst_offset); - dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation scalar[0]); - } -} - -__kernel void arithm_absdiff_scalar(__global T *src1, int src1_step, int src1_offset, - __global WT *src2, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, x + src1_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); - - WT value = convertToWT(src1[src1_index]) - src2[0]; - value = value > (WT)(0) ? value : -value; - dst[dst_index] = convertToT(value); - } -} - -// scalar divide to matrix -__kernel void arithm_binary_op_scalar_div(__global T *src1, int src1_step, int src1_offset, - __global WT *scalar, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, x + src1_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); - - T zero = (T)(0); - dst[dst_index] = src1[src1_index] == zero ? zero : convertToT(scalar[0] / convertToWT(src1[src1_index])); + EXPRESSION } } diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl index 5c3408034..d472b3cbf 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl @@ -51,6 +51,24 @@ #endif #endif +#if defined (FUNC_ADD) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar[0]); +#endif + +#if defined (FUNC_SUB) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar[0]); +#endif + +#if defined (FUNC_MUL) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0]); +#endif + +#if defined (FUNC_DIV) +#define EXPRESSION T zero = (T)(0); \ + dst[dst_index] = src2[src2_index] == zero ? zero : \ + convertToT(convertToWT(src1[src1_index]) / scalar[0]); +#endif + /////////////////////////////////////////////////////////////////////////////////// //////////////////////////// Add with scalar with mask //////////////////////////// /////////////////////////////////////////////////////////////////////////////////// @@ -72,7 +90,7 @@ __kernel void arithm_binary_op_scalar_mask(__global T *src1, int src1_step, int int src1_index = mad24(y, src1_step, x + src1_offset); int dst_index = mad24(y, dst_step, dst_offset + x); - dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation scalar[0]); + EXPRESSION } } } diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index db01d9503..f2f13ec41 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -535,7 +535,7 @@ TEST_P(Absdiff, Mat) } } -TEST_P(Absdiff, Mat_Scalar) +TEST_P(Absdiff, Scalar) { for (int j = 0; j < LOOP_TIMES; j++) {