From 26afa49d710603b225fc6cb8b9aa3e2369980a99 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Dec 2014 15:41:14 +0300 Subject: [PATCH] fix cuda match template: use correct types for integral/sum outputs --- .../cudaimgproc/src/cuda/match_template.cu | 146 +++++++++--------- modules/cudaimgproc/src/match_template.cpp | 124 +++++++-------- 2 files changed, 135 insertions(+), 135 deletions(-) diff --git a/modules/cudaimgproc/src/cuda/match_template.cu b/modules/cudaimgproc/src/cuda/match_template.cu index 832878f9f..87ee71e1e 100644 --- a/modules/cudaimgproc/src/cuda/match_template.cu +++ b/modules/cudaimgproc/src/cuda/match_template.cu @@ -218,7 +218,7 @@ namespace cv { namespace cuda { namespace device // Prepared_SQDIFF template - __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result) + __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep image_sqsum, double templ_sqsum, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -234,7 +234,7 @@ namespace cv { namespace cuda { namespace device } template - void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream) + void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz image_sqsum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream) { const dim3 threads(32, 8); const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); @@ -246,10 +246,10 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn, + void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz image_sqsum, double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream) { - typedef void (*caller_t)(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream); + typedef void (*caller_t)(int w, int h, const PtrStepSz image_sqsum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream); static const caller_t callers[] = { @@ -287,8 +287,8 @@ namespace cv { namespace cuda { namespace device template __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U( - int w, int h, const PtrStep image_sqsum, - unsigned long long templ_sqsum, PtrStepSzf result) + int w, int h, const PtrStep image_sqsum, + double templ_sqsum, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -305,7 +305,7 @@ namespace cv { namespace cuda { namespace device } template - void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, + void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz image_sqsum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream) { const dim3 threads(32, 8); @@ -319,10 +319,10 @@ namespace cv { namespace cuda { namespace device } - void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, + void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz image_sqsum, double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream) { - typedef void (*caller_t)(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream); + typedef void (*caller_t)(int w, int h, const PtrStepSz image_sqsum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream); static const caller_t callers[] = { 0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4> @@ -334,7 +334,7 @@ namespace cv { namespace cuda { namespace device ////////////////////////////////////////////////////////////////////// // Prepared_CCOFF - __global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep image_sum, PtrStepSzf result) + __global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep image_sum, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -349,7 +349,7 @@ namespace cv { namespace cuda { namespace device } } - void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream) + void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz image_sum, int templ_sum, PtrStepSzf result, cudaStream_t stream) { dim3 threads(32, 8); dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); @@ -365,8 +365,8 @@ namespace cv { namespace cuda { namespace device __global__ void matchTemplatePreparedKernel_CCOFF_8UC2( int w, int h, float templ_sum_scale_r, float templ_sum_scale_g, - const PtrStep image_sum_r, - const PtrStep image_sum_g, + const PtrStep image_sum_r, + const PtrStep image_sum_g, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -388,9 +388,9 @@ namespace cv { namespace cuda { namespace device void matchTemplatePrepared_CCOFF_8UC2( int w, int h, - const PtrStepSz image_sum_r, - const PtrStepSz image_sum_g, - unsigned int templ_sum_r, unsigned int templ_sum_g, + const PtrStepSz image_sum_r, + const PtrStepSz image_sum_g, + int templ_sum_r, int templ_sum_g, PtrStepSzf result, cudaStream_t stream) { dim3 threads(32, 8); @@ -412,9 +412,9 @@ namespace cv { namespace cuda { namespace device float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b, - const PtrStep image_sum_r, - const PtrStep image_sum_g, - const PtrStep image_sum_b, + const PtrStep image_sum_r, + const PtrStep image_sum_g, + const PtrStep image_sum_b, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -440,12 +440,12 @@ namespace cv { namespace cuda { namespace device void matchTemplatePrepared_CCOFF_8UC3( int w, int h, - const PtrStepSz image_sum_r, - const PtrStepSz image_sum_g, - const PtrStepSz image_sum_b, - unsigned int templ_sum_r, - unsigned int templ_sum_g, - unsigned int templ_sum_b, + const PtrStepSz image_sum_r, + const PtrStepSz image_sum_g, + const PtrStepSz image_sum_b, + int templ_sum_r, + int templ_sum_g, + int templ_sum_b, PtrStepSzf result, cudaStream_t stream) { dim3 threads(32, 8); @@ -471,10 +471,10 @@ namespace cv { namespace cuda { namespace device float templ_sum_scale_g, float templ_sum_scale_b, float templ_sum_scale_a, - const PtrStep image_sum_r, - const PtrStep image_sum_g, - const PtrStep image_sum_b, - const PtrStep image_sum_a, + const PtrStep image_sum_r, + const PtrStep image_sum_g, + const PtrStep image_sum_b, + const PtrStep image_sum_a, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -504,14 +504,14 @@ namespace cv { namespace cuda { namespace device void matchTemplatePrepared_CCOFF_8UC4( int w, int h, - const PtrStepSz image_sum_r, - const PtrStepSz image_sum_g, - const PtrStepSz image_sum_b, - const PtrStepSz image_sum_a, - unsigned int templ_sum_r, - unsigned int templ_sum_g, - unsigned int templ_sum_b, - unsigned int templ_sum_a, + const PtrStepSz image_sum_r, + const PtrStepSz image_sum_g, + const PtrStepSz image_sum_b, + const PtrStepSz image_sum_a, + int templ_sum_r, + int templ_sum_g, + int templ_sum_b, + int templ_sum_a, PtrStepSzf result, cudaStream_t stream) { dim3 threads(32, 8); @@ -537,8 +537,8 @@ namespace cv { namespace cuda { namespace device __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U( int w, int h, float weight, float templ_sum_scale, float templ_sqsum_scale, - const PtrStep image_sum, - const PtrStep image_sqsum, + const PtrStep image_sum, + const PtrStep image_sqsum, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -559,9 +559,9 @@ namespace cv { namespace cuda { namespace device } void matchTemplatePrepared_CCOFF_NORMED_8U( - int w, int h, const PtrStepSz image_sum, - const PtrStepSz image_sqsum, - unsigned int templ_sum, unsigned long long templ_sqsum, + int w, int h, const PtrStepSz image_sum, + const PtrStepSz image_sqsum, + int templ_sum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream) { dim3 threads(32, 8); @@ -586,8 +586,8 @@ namespace cv { namespace cuda { namespace device int w, int h, float weight, float templ_sum_scale_r, float templ_sum_scale_g, float templ_sqsum_scale, - const PtrStep image_sum_r, const PtrStep image_sqsum_r, - const PtrStep image_sum_g, const PtrStep image_sqsum_g, + const PtrStep image_sum_r, const PtrStep image_sqsum_r, + const PtrStep image_sum_g, const PtrStep image_sqsum_g, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -618,10 +618,10 @@ namespace cv { namespace cuda { namespace device void matchTemplatePrepared_CCOFF_NORMED_8UC2( int w, int h, - const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, - const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, - unsigned int templ_sum_r, unsigned long long templ_sqsum_r, - unsigned int templ_sum_g, unsigned long long templ_sqsum_g, + const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, + const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, + int templ_sum_r, double templ_sqsum_r, + int templ_sum_g, double templ_sqsum_g, PtrStepSzf result, cudaStream_t stream) { dim3 threads(32, 8); @@ -652,9 +652,9 @@ namespace cv { namespace cuda { namespace device int w, int h, float weight, float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b, float templ_sqsum_scale, - const PtrStep image_sum_r, const PtrStep image_sqsum_r, - const PtrStep image_sum_g, const PtrStep image_sqsum_g, - const PtrStep image_sum_b, const PtrStep image_sqsum_b, + const PtrStep image_sum_r, const PtrStep image_sqsum_r, + const PtrStep image_sum_g, const PtrStep image_sqsum_g, + const PtrStep image_sum_b, const PtrStep image_sqsum_b, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -693,12 +693,12 @@ namespace cv { namespace cuda { namespace device void matchTemplatePrepared_CCOFF_NORMED_8UC3( int w, int h, - const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, - const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, - const PtrStepSz image_sum_b, const PtrStepSz image_sqsum_b, - unsigned int templ_sum_r, unsigned long long templ_sqsum_r, - unsigned int templ_sum_g, unsigned long long templ_sqsum_g, - unsigned int templ_sum_b, unsigned long long templ_sqsum_b, + const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, + const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, + const PtrStepSz image_sum_b, const PtrStepSz image_sqsum_b, + int templ_sum_r, double templ_sqsum_r, + int templ_sum_g, double templ_sqsum_g, + int templ_sum_b, double templ_sqsum_b, PtrStepSzf result, cudaStream_t stream) { dim3 threads(32, 8); @@ -732,10 +732,10 @@ namespace cv { namespace cuda { namespace device int w, int h, float weight, float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b, float templ_sum_scale_a, float templ_sqsum_scale, - const PtrStep image_sum_r, const PtrStep image_sqsum_r, - const PtrStep image_sum_g, const PtrStep image_sqsum_g, - const PtrStep image_sum_b, const PtrStep image_sqsum_b, - const PtrStep image_sum_a, const PtrStep image_sqsum_a, + const PtrStep image_sum_r, const PtrStep image_sqsum_r, + const PtrStep image_sum_g, const PtrStep image_sqsum_g, + const PtrStep image_sum_b, const PtrStep image_sqsum_b, + const PtrStep image_sum_a, const PtrStep image_sqsum_a, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -780,14 +780,14 @@ namespace cv { namespace cuda { namespace device void matchTemplatePrepared_CCOFF_NORMED_8UC4( int w, int h, - const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, - const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, - const PtrStepSz image_sum_b, const PtrStepSz image_sqsum_b, - const PtrStepSz image_sum_a, const PtrStepSz image_sqsum_a, - unsigned int templ_sum_r, unsigned long long templ_sqsum_r, - unsigned int templ_sum_g, unsigned long long templ_sqsum_g, - unsigned int templ_sum_b, unsigned long long templ_sqsum_b, - unsigned int templ_sum_a, unsigned long long templ_sqsum_a, + const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, + const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, + const PtrStepSz image_sum_b, const PtrStepSz image_sqsum_b, + const PtrStepSz image_sum_a, const PtrStepSz image_sqsum_a, + int templ_sum_r, double templ_sqsum_r, + int templ_sum_g, double templ_sqsum_g, + int templ_sum_b, double templ_sqsum_b, + int templ_sum_a, double templ_sqsum_a, PtrStepSzf result, cudaStream_t stream) { dim3 threads(32, 8); @@ -823,8 +823,8 @@ namespace cv { namespace cuda { namespace device template __global__ void normalizeKernel_8U( - int w, int h, const PtrStep image_sqsum, - unsigned long long templ_sqsum, PtrStepSzf result) + int w, int h, const PtrStep image_sqsum, + double templ_sqsum, PtrStepSzf result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -838,8 +838,8 @@ namespace cv { namespace cuda { namespace device } } - void normalize_8U(int w, int h, const PtrStepSz image_sqsum, - unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream) + void normalize_8U(int w, int h, const PtrStepSz image_sqsum, + double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream) { dim3 threads(32, 8); dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); diff --git a/modules/cudaimgproc/src/match_template.cpp b/modules/cudaimgproc/src/match_template.cpp index 19d091588..c5ab143ec 100644 --- a/modules/cudaimgproc/src/match_template.cpp +++ b/modules/cudaimgproc/src/match_template.cpp @@ -61,77 +61,77 @@ namespace cv { namespace cuda { namespace device void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream); void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream); - void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, + void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz image_sqsum, double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream); - void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, + void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz image_sqsum, double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream); - void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream); + void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz image_sum, int templ_sum, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_8UC2( int w, int h, - const PtrStepSz image_sum_r, - const PtrStepSz image_sum_g, - unsigned int templ_sum_r, - unsigned int templ_sum_g, + const PtrStepSz image_sum_r, + const PtrStepSz image_sum_g, + int templ_sum_r, + int templ_sum_g, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_8UC3( int w, int h, - const PtrStepSz image_sum_r, - const PtrStepSz image_sum_g, - const PtrStepSz image_sum_b, - unsigned int templ_sum_r, - unsigned int templ_sum_g, - unsigned int templ_sum_b, + const PtrStepSz image_sum_r, + const PtrStepSz image_sum_g, + const PtrStepSz image_sum_b, + int templ_sum_r, + int templ_sum_g, + int templ_sum_b, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_8UC4( int w, int h, - const PtrStepSz image_sum_r, - const PtrStepSz image_sum_g, - const PtrStepSz image_sum_b, - const PtrStepSz image_sum_a, - unsigned int templ_sum_r, - unsigned int templ_sum_g, - unsigned int templ_sum_b, - unsigned int templ_sum_a, + const PtrStepSz image_sum_r, + const PtrStepSz image_sum_g, + const PtrStepSz image_sum_b, + const PtrStepSz image_sum_a, + int templ_sum_r, + int templ_sum_g, + int templ_sum_b, + int templ_sum_a, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_NORMED_8U( - int w, int h, const PtrStepSz image_sum, - const PtrStepSz image_sqsum, - unsigned int templ_sum, unsigned long long templ_sqsum, + int w, int h, const PtrStepSz image_sum, + const PtrStepSz image_sqsum, + int templ_sum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_NORMED_8UC2( int w, int h, - const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, - const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, - unsigned int templ_sum_r, unsigned long long templ_sqsum_r, - unsigned int templ_sum_g, unsigned long long templ_sqsum_g, + const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, + const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, + int templ_sum_r, double templ_sqsum_r, + int templ_sum_g, double templ_sqsum_g, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_NORMED_8UC3( int w, int h, - const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, - const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, - const PtrStepSz image_sum_b, const PtrStepSz image_sqsum_b, - unsigned int templ_sum_r, unsigned long long templ_sqsum_r, - unsigned int templ_sum_g, unsigned long long templ_sqsum_g, - unsigned int templ_sum_b, unsigned long long templ_sqsum_b, + const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, + const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, + const PtrStepSz image_sum_b, const PtrStepSz image_sqsum_b, + int templ_sum_r, double templ_sqsum_r, + int templ_sum_g, double templ_sqsum_g, + int templ_sum_b, double templ_sqsum_b, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_NORMED_8UC4( int w, int h, - const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, - const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, - const PtrStepSz image_sum_b, const PtrStepSz image_sqsum_b, - const PtrStepSz image_sum_a, const PtrStepSz image_sqsum_a, - unsigned int templ_sum_r, unsigned long long templ_sqsum_r, - unsigned int templ_sum_g, unsigned long long templ_sqsum_g, - unsigned int templ_sum_b, unsigned long long templ_sqsum_b, - unsigned int templ_sum_a, unsigned long long templ_sqsum_a, + const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, + const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, + const PtrStepSz image_sum_b, const PtrStepSz image_sqsum_b, + const PtrStepSz image_sum_a, const PtrStepSz image_sqsum_a, + int templ_sum_r, double templ_sqsum_r, + int templ_sum_g, double templ_sqsum_g, + int templ_sum_b, double templ_sqsum_b, + int templ_sum_a, double templ_sqsum_a, PtrStepSzf result, cudaStream_t stream); - void normalize_8U(int w, int h, const PtrStepSz image_sqsum, - unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream); + void normalize_8U(int w, int h, const PtrStepSz image_sqsum, + double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream); void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream); } @@ -290,7 +290,7 @@ namespace cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream); - unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0]; + double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0]; normalize_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream)); } @@ -361,7 +361,7 @@ namespace cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream); - unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0]; + double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0]; match_CCORR_.match(image, templ, _result, stream); GpuMat result = _result.getGpuMat(); @@ -400,7 +400,7 @@ namespace cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream); - unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0]; + double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0]; match_CCORR_.match(image, templ, _result, stream); GpuMat result = _result.getGpuMat(); @@ -446,7 +446,7 @@ namespace image_sums_.resize(1); cuda::integral(image, image_sums_[0], intBuffer_, stream); - unsigned int templ_sum = (unsigned int) cuda::sum(templ)[0]; + int templ_sum = (int) cuda::sum(templ)[0]; matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sums_[0], templ_sum, result, StreamAccessor::getStream(stream)); } @@ -465,19 +465,19 @@ namespace case 2: matchTemplatePrepared_CCOFF_8UC2( templ.cols, templ.rows, image_sums_[0], image_sums_[1], - (unsigned int) templ_sum[0], (unsigned int) templ_sum[1], + (int) templ_sum[0], (int) templ_sum[1], result, StreamAccessor::getStream(stream)); break; case 3: matchTemplatePrepared_CCOFF_8UC3( templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], - (unsigned int) templ_sum[0], (unsigned int) templ_sum[1], (unsigned int) templ_sum[2], + (int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2], result, StreamAccessor::getStream(stream)); break; case 4: matchTemplatePrepared_CCOFF_8UC4( templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], image_sums_[3], - (unsigned int) templ_sum[0], (unsigned int) templ_sum[1], (unsigned int) templ_sum[2], (unsigned int) templ_sum[3], + (int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2], (int) templ_sum[3], result, StreamAccessor::getStream(stream)); break; default: @@ -532,8 +532,8 @@ namespace image_sqsums_.resize(1); cuda::sqrIntegral(image, image_sqsums_[0], intBuffer_, stream); - unsigned int templ_sum = (unsigned int) cuda::sum(templ)[0]; - unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ)[0]; + int templ_sum = (int) cuda::sum(templ)[0]; + double templ_sqsum = cuda::sqrSum(templ)[0]; matchTemplatePrepared_CCOFF_NORMED_8U( templ.cols, templ.rows, image_sums_[0], image_sqsums_[0], @@ -561,8 +561,8 @@ namespace templ.cols, templ.rows, image_sums_[0], image_sqsums_[0], image_sums_[1], image_sqsums_[1], - (unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0], - (unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1], + (int)templ_sum[0], templ_sqsum[0], + (int)templ_sum[1], templ_sqsum[1], result, StreamAccessor::getStream(stream)); break; case 3: @@ -571,9 +571,9 @@ namespace image_sums_[0], image_sqsums_[0], image_sums_[1], image_sqsums_[1], image_sums_[2], image_sqsums_[2], - (unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0], - (unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1], - (unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2], + (int)templ_sum[0], templ_sqsum[0], + (int)templ_sum[1], templ_sqsum[1], + (int)templ_sum[2], templ_sqsum[2], result, StreamAccessor::getStream(stream)); break; case 4: @@ -583,10 +583,10 @@ namespace image_sums_[1], image_sqsums_[1], image_sums_[2], image_sqsums_[2], image_sums_[3], image_sqsums_[3], - (unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0], - (unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1], - (unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2], - (unsigned int)templ_sum[3], (unsigned long long)templ_sqsum[3], + (int)templ_sum[0], templ_sqsum[0], + (int)templ_sum[1], templ_sqsum[1], + (int)templ_sum[2], templ_sqsum[2], + (int)templ_sum[3], templ_sqsum[3], result, StreamAccessor::getStream(stream)); break; default: