Used float instead of int for CV_8U in sumTemplate
This commit is contained in:
		| @@ -90,11 +90,8 @@ __kernel void calcSum(__global const uchar * srcptr, int src_step, int src_offse | |||||||
|         T src = loadpix(srcptr + src_index); |         T src = loadpix(srcptr + src_index); | ||||||
|  |  | ||||||
|         tmp = convertToWT(src); |         tmp = convertToWT(src); | ||||||
| #if wdepth == 4 |  | ||||||
|         accumulator = mad24(tmp, tmp, accumulator); |  | ||||||
| #else |  | ||||||
|         accumulator = mad(tmp, tmp, accumulator); |         accumulator = mad(tmp, tmp, accumulator); | ||||||
| #endif |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (lid < WGS2_ALIGNED) |     if (lid < WGS2_ALIGNED) | ||||||
| @@ -165,11 +162,9 @@ __kernel void matchTemplate_Naive_CCORR(__global const uchar * srcptr, int src_s | |||||||
|                 { |                 { | ||||||
|                     T temp = (T)(template[j]); |                     T temp = (T)(template[j]); | ||||||
|                     T src = *(__global const T*)(srcptr + ind + j*(int)sizeof(T1)); |                     T src = *(__global const T*)(srcptr + ind + j*(int)sizeof(T1)); | ||||||
| #if wdepth == 4 |  | ||||||
|                         sum = mad24(convertToWT(src), convertToWT(temp), sum); |                     sum = mad(convertToWT(src), convertToWT(temp), sum); | ||||||
| #else |  | ||||||
|                         sum = mad(convertToWT(src), convertToWT(temp), sum); |  | ||||||
| #endif |  | ||||||
|                 } |                 } | ||||||
|             ind += src_step; |             ind += src_step; | ||||||
|             template = (__global const T1 *)((__global const uchar *)template + template_step); |             template = (__global const T1 *)((__global const uchar *)template + template_step); | ||||||
| @@ -195,12 +190,7 @@ __kernel void matchTemplate_Naive_CCORR(__global const uchar * srcptr, int src_s | |||||||
|                     #pragma unroll |                     #pragma unroll | ||||||
|                     for (int cx=0, x = x0; cx < PIX_PER_WI_X && x < dst_cols; ++cx, ++x) |                     for (int cx=0, x = x0; cx < PIX_PER_WI_X && x < dst_cols; ++cx, ++x) | ||||||
|                     { |                     { | ||||||
|  |  | ||||||
| #if wdepth == 4 |  | ||||||
|                         sum[cx] = mad24(convertToWT1(src[j+cx]), convertToWT1(template[j]), sum[cx]); |  | ||||||
| #else |  | ||||||
|                         sum[cx] = mad(convertToWT1(src[j+cx]), convertToWT1(template[j]), sum[cx]); |                         sum[cx] = mad(convertToWT1(src[j+cx]), convertToWT1(template[j]), sum[cx]); | ||||||
| #endif |  | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -237,11 +227,8 @@ __kernel void matchTemplate_Naive_CCORR(__global const uchar * srcptr, int src_s | |||||||
|             { |             { | ||||||
|                 T src      = loadpix(srcptr      + mad24(y+i, src_step,    mad24(x+j, TSIZE, src_offset))); |                 T src      = loadpix(srcptr      + mad24(y+i, src_step,    mad24(x+j, TSIZE, src_offset))); | ||||||
|                 T template = loadpix(templateptr + mad24(i, template_step, mad24(j, TSIZE, template_offset))); |                 T template = loadpix(templateptr + mad24(i, template_step, mad24(j, TSIZE, template_offset))); | ||||||
| #if wdepth == 4 |  | ||||||
|                 sum = mad24(convertToWT(src), convertToWT(template), sum); |  | ||||||
| #else |  | ||||||
|                 sum = mad(convertToWT(src), convertToWT(template), sum); |                 sum = mad(convertToWT(src), convertToWT(template), sum); | ||||||
| #endif |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -296,11 +283,8 @@ __kernel void matchTemplate_Naive_SQDIFF(__global const uchar * srcptr, int src_ | |||||||
|                 T template = loadpix(templateptr + mad24(i, template_step, mad24(j, TSIZE, template_offset))); |                 T template = loadpix(templateptr + mad24(i, template_step, mad24(j, TSIZE, template_offset))); | ||||||
|  |  | ||||||
|                 value = convertToWT(src) - convertToWT(template); |                 value = convertToWT(src) - convertToWT(template); | ||||||
| #if wdepth == 4 |  | ||||||
|                 sum = mad24(value, value, sum); |  | ||||||
| #else |  | ||||||
|                 sum = mad(value, value, sum); |                 sum = mad(value, value, sum); | ||||||
| #endif |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -79,7 +79,7 @@ static bool extractFirstChannel_32F(InputArray _image, OutputArray _result, int | |||||||
| static bool sumTemplate(InputArray _src, UMat & result) | static bool sumTemplate(InputArray _src, UMat & result) | ||||||
| { | { | ||||||
|     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); |     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); | ||||||
|     int wdepth = std::max(CV_32S, depth), wtype = CV_MAKE_TYPE(wdepth, cn); |     int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn); | ||||||
|     size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); |     size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); | ||||||
|  |  | ||||||
|     int wgs2_aligned = 1; |     int wgs2_aligned = 1; | ||||||
| @@ -89,10 +89,10 @@ static bool sumTemplate(InputArray _src, UMat & result) | |||||||
|  |  | ||||||
|     char cvt[40]; |     char cvt[40]; | ||||||
|     ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc, |     ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc, | ||||||
|                   format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d -D wdepth=%d", |                   format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d", | ||||||
|                          ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn, |                          ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn, | ||||||
|                          ocl::convertTypeStr(depth, wdepth, cn, cvt), |                          ocl::convertTypeStr(depth, wdepth, cn, cvt), | ||||||
|                          (int)wgs, wgs2_aligned, wdepth)); |                          (int)wgs, wgs2_aligned)); | ||||||
|     if (k.empty()) |     if (k.empty()) | ||||||
|         return false; |         return false; | ||||||
|  |  | ||||||
| @@ -281,8 +281,8 @@ static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, Outpu | |||||||
|     const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1); |     const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1); | ||||||
|  |  | ||||||
|     ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc, |     ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc, | ||||||
|                   format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D wdepth=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype), |                   format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype), | ||||||
|                          convertToWT, convertToWT1, cn, wdepth, pxPerWIx)); |                          convertToWT, convertToWT1, cn, pxPerWIx)); | ||||||
|     if (k.empty()) |     if (k.empty()) | ||||||
|         return false; |         return false; | ||||||
|  |  | ||||||
| @@ -358,8 +358,8 @@ static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, Outp | |||||||
|  |  | ||||||
|     char cvt[40]; |     char cvt[40]; | ||||||
|     ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc, |     ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc, | ||||||
|                   format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d -D wdepth=%d", ocl::typeToStr(type), ocl::typeToStr(depth), |                   format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), | ||||||
|                          ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn, wdepth)); |                          ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn)); | ||||||
|     if (k.empty()) |     if (k.empty()) | ||||||
|         return false; |         return false; | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Elena Gvozdeva
					Elena Gvozdeva