Merge pull request #2228 from ilya-lavrenov:tapi_experiments

This commit is contained in:
Andrey Pavlenko
2014-01-31 19:14:18 +04:00
committed by OpenCV Buildbot
5 changed files with 72 additions and 17 deletions

View File

@@ -3378,6 +3378,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
btype,
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
build_options += ocl::kernelToStr(kernelX, CV_32F);
Size srcWholeSize; Point srcOffset;
src.locateROI(srcWholeSize, srcOffset);
@@ -3390,7 +3391,8 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
strKernel << "_D" << sdepth;
ocl::Kernel kernelRow;
if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, build_options))
if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc,
build_options))
return false;
int idxArg = 0;
@@ -3409,7 +3411,6 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
idxArg = kernelRow.set(idxArg, buf.cols);
idxArg = kernelRow.set(idxArg, buf.rows);
idxArg = kernelRow.set(idxArg, radiusY);
idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ)));
return kernelRow.run(2, globalsize, localsize, sync);
}
@@ -3479,6 +3480,8 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b
}
}
build_options += ocl::kernelToStr(kernelY, CV_32F);
ocl::Kernel kernelCol;
if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options))
return false;
@@ -3494,7 +3497,6 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b
idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize()));
idxArg = kernelCol.set(idxArg, dst.cols);
idxArg = kernelCol.set(idxArg, dst.rows);
idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ)));
return kernelCol.run(2, globalsize, localsize, sync);
}
@@ -3508,7 +3510,7 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
int type = _src.type();
if ( !( (CV_8UC1 == type || CV_8UC4 == type || CV_32FC1 == type || CV_32FC4 == type) &&
(ddepth == CV_32F || ddepth == CV_8U) ) )
(ddepth == CV_32F || ddepth == CV_8U || ddepth < 0) ) )
return false;
int cn = CV_MAT_CN(type);
@@ -3541,12 +3543,12 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
Size srcSize = src.size();
Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1);
UMat buf; buf.create(bufSize, CV_MAKETYPE(CV_32F, cn));
if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, true))
if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, false))
return false;
_dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
UMat dst = _dst.getUMat();
return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, true);
return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, false);
}
#endif

View File

@@ -60,6 +60,8 @@ Niko
The info above maybe obsolete.
***********************************************************************************/
#define DIG(a) a,
__constant float mat_kernel[] = { COEFF };
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
(__global const GENTYPE_SRC * restrict src,
@@ -70,8 +72,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
const int dst_offset_in_pixel,
const int dst_step_in_pixel,
const int dst_cols,
const int dst_rows,
__constant float * mat_kernel)
const int dst_rows)
{
int x = get_global_id(0);
int y = get_global_id(1);

View File

@@ -144,6 +144,9 @@ Niko
The info above maybe obsolete.
***********************************************************************************/
#define DIG(a) a,
__constant float mat_kernel[] = { COEFF };
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0
(__global uchar * restrict src,
int src_step_in_pixel,
@@ -153,8 +156,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
__global float * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
int radiusy,
__constant float * mat_kernel)
int radiusy)
{
int x = get_global_id(0)<<2;
int y = get_global_id(1);
@@ -297,8 +299,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
__global float4 * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
int radiusy,
__constant float * mat_kernel)
int radiusy)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -391,8 +392,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
__global float * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
int radiusy,
__constant float * mat_kernel)
int radiusy)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -484,8 +484,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
__global float4 * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
int radiusy,
__constant float * mat_kernel)
int radiusy)
{
int x = get_global_id(0);
int y = get_global_id(1);