Merge pull request #2490 from ilya-lavrenov:ocl_sep_filters
This commit is contained in:
@@ -774,12 +774,12 @@ static void sepFilter2D_SinglePass(const oclMat &src, oclMat &dst,
|
||||
|
||||
option += " -D KERNEL_MATRIX_X=";
|
||||
for(int i=0; i<row_kernel.rows; i++)
|
||||
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
|
||||
option += cv::format("DIG(0x%x)", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
|
||||
option += "0x0";
|
||||
|
||||
option += " -D KERNEL_MATRIX_Y=";
|
||||
for(int i=0; i<col_kernel.rows; i++)
|
||||
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
|
||||
option += cv::format("DIG(0x%x)", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
|
||||
option += "0x0";
|
||||
|
||||
switch(src.type())
|
||||
@@ -1410,7 +1410,7 @@ Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int
|
||||
//if image size is non-degenerate and large enough
|
||||
//and if filter support is reasonable to satisfy larger local memory requirements,
|
||||
//then we can use single pass routine to avoid extra runtime calls overhead
|
||||
if( clCxt && clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) &&
|
||||
if( clCxt &&
|
||||
rowKernel.rows <= 21 && columnKernel.rows <= 21 &&
|
||||
(rowKernel.rows & 1) == 1 && (columnKernel.rows & 1) == 1 &&
|
||||
imgSize.width > optimizedSepFilterLocalSize + (rowKernel.rows>>1) &&
|
||||
|
||||
@@ -84,6 +84,8 @@
|
||||
|
||||
#define DST(_x,_y) (((global DSTTYPE*)(Dst+DstOffset+(_y)*DstPitch))[_x])
|
||||
|
||||
#define DIG(a) a,
|
||||
|
||||
//horizontal and vertical filter kernels
|
||||
//should be defined on host during compile time to avoid overhead
|
||||
__constant uint mat_kernelX[] = {KERNEL_MATRIX_X};
|
||||
|
||||
Reference in New Issue
Block a user