fix cuda match template:
use correct types for integral/sum outputs
This commit is contained in:
parent
9b8c3fd675
commit
26afa49d71
@ -218,7 +218,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
// Prepared_SQDIFF
|
// Prepared_SQDIFF
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result)
|
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<double> image_sqsum, double templ_sqsum, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -234,7 +234,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream)
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -246,10 +246,10 @@ namespace cv { namespace cuda { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn,
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, int cn,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
|
typedef void (*caller_t)(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -287,8 +287,8 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(
|
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(
|
||||||
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
int w, int h, const PtrStep<double> image_sqsum,
|
||||||
unsigned long long templ_sqsum, PtrStepSzf result)
|
double templ_sqsum, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -305,7 +305,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum,
|
||||||
PtrStepSzf result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
@ -319,10 +319,10 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum,
|
||||||
PtrStepSzf result, int cn, cudaStream_t stream)
|
PtrStepSzf result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
|
typedef void (*caller_t)(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream);
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>
|
0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>
|
||||||
@ -334,7 +334,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Prepared_CCOFF
|
// Prepared_CCOFF
|
||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep<unsigned int> image_sum, PtrStepSzf result)
|
__global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep<int> image_sum, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -349,7 +349,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<unsigned int> image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream)
|
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<int> image_sum, int templ_sum, PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -365,8 +365,8 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_8UC2(
|
__global__ void matchTemplatePreparedKernel_CCOFF_8UC2(
|
||||||
int w, int h, float templ_sum_scale_r, float templ_sum_scale_g,
|
int w, int h, float templ_sum_scale_r, float templ_sum_scale_g,
|
||||||
const PtrStep<unsigned int> image_sum_r,
|
const PtrStep<int> image_sum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g,
|
const PtrStep<int> image_sum_g,
|
||||||
PtrStepSzf result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -388,9 +388,9 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8UC2(
|
void matchTemplatePrepared_CCOFF_8UC2(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r,
|
const PtrStepSz<int> image_sum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g,
|
const PtrStepSz<int> image_sum_g,
|
||||||
unsigned int templ_sum_r, unsigned int templ_sum_g,
|
int templ_sum_r, int templ_sum_g,
|
||||||
PtrStepSzf result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
@ -412,9 +412,9 @@ namespace cv { namespace cuda { namespace device
|
|||||||
float templ_sum_scale_r,
|
float templ_sum_scale_r,
|
||||||
float templ_sum_scale_g,
|
float templ_sum_scale_g,
|
||||||
float templ_sum_scale_b,
|
float templ_sum_scale_b,
|
||||||
const PtrStep<unsigned int> image_sum_r,
|
const PtrStep<int> image_sum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g,
|
const PtrStep<int> image_sum_g,
|
||||||
const PtrStep<unsigned int> image_sum_b,
|
const PtrStep<int> image_sum_b,
|
||||||
PtrStepSzf result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -440,12 +440,12 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8UC3(
|
void matchTemplatePrepared_CCOFF_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r,
|
const PtrStepSz<int> image_sum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g,
|
const PtrStepSz<int> image_sum_g,
|
||||||
const PtrStepSz<unsigned int> image_sum_b,
|
const PtrStepSz<int> image_sum_b,
|
||||||
unsigned int templ_sum_r,
|
int templ_sum_r,
|
||||||
unsigned int templ_sum_g,
|
int templ_sum_g,
|
||||||
unsigned int templ_sum_b,
|
int templ_sum_b,
|
||||||
PtrStepSzf result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
@ -471,10 +471,10 @@ namespace cv { namespace cuda { namespace device
|
|||||||
float templ_sum_scale_g,
|
float templ_sum_scale_g,
|
||||||
float templ_sum_scale_b,
|
float templ_sum_scale_b,
|
||||||
float templ_sum_scale_a,
|
float templ_sum_scale_a,
|
||||||
const PtrStep<unsigned int> image_sum_r,
|
const PtrStep<int> image_sum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g,
|
const PtrStep<int> image_sum_g,
|
||||||
const PtrStep<unsigned int> image_sum_b,
|
const PtrStep<int> image_sum_b,
|
||||||
const PtrStep<unsigned int> image_sum_a,
|
const PtrStep<int> image_sum_a,
|
||||||
PtrStepSzf result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -504,14 +504,14 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8UC4(
|
void matchTemplatePrepared_CCOFF_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r,
|
const PtrStepSz<int> image_sum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g,
|
const PtrStepSz<int> image_sum_g,
|
||||||
const PtrStepSz<unsigned int> image_sum_b,
|
const PtrStepSz<int> image_sum_b,
|
||||||
const PtrStepSz<unsigned int> image_sum_a,
|
const PtrStepSz<int> image_sum_a,
|
||||||
unsigned int templ_sum_r,
|
int templ_sum_r,
|
||||||
unsigned int templ_sum_g,
|
int templ_sum_g,
|
||||||
unsigned int templ_sum_b,
|
int templ_sum_b,
|
||||||
unsigned int templ_sum_a,
|
int templ_sum_a,
|
||||||
PtrStepSzf result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
@ -537,8 +537,8 @@ namespace cv { namespace cuda { namespace device
|
|||||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U(
|
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U(
|
||||||
int w, int h, float weight,
|
int w, int h, float weight,
|
||||||
float templ_sum_scale, float templ_sqsum_scale,
|
float templ_sum_scale, float templ_sqsum_scale,
|
||||||
const PtrStep<unsigned int> image_sum,
|
const PtrStep<int> image_sum,
|
||||||
const PtrStep<unsigned long long> image_sqsum,
|
const PtrStep<double> image_sqsum,
|
||||||
PtrStepSzf result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -559,9 +559,9 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||||
int w, int h, const PtrStepSz<unsigned int> image_sum,
|
int w, int h, const PtrStepSz<int> image_sum,
|
||||||
const PtrStepSz<unsigned long long> image_sqsum,
|
const PtrStepSz<double> image_sqsum,
|
||||||
unsigned int templ_sum, unsigned long long templ_sqsum,
|
int templ_sum, double templ_sqsum,
|
||||||
PtrStepSzf result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
@ -586,8 +586,8 @@ namespace cv { namespace cuda { namespace device
|
|||||||
int w, int h, float weight,
|
int w, int h, float weight,
|
||||||
float templ_sum_scale_r, float templ_sum_scale_g,
|
float templ_sum_scale_r, float templ_sum_scale_g,
|
||||||
float templ_sqsum_scale,
|
float templ_sqsum_scale,
|
||||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
const PtrStep<int> image_sum_r, const PtrStep<double> image_sqsum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
const PtrStep<int> image_sum_g, const PtrStep<double> image_sqsum_g,
|
||||||
PtrStepSzf result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -618,10 +618,10 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
int templ_sum_r, double templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
int templ_sum_g, double templ_sqsum_g,
|
||||||
PtrStepSzf result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
@ -652,9 +652,9 @@ namespace cv { namespace cuda { namespace device
|
|||||||
int w, int h, float weight,
|
int w, int h, float weight,
|
||||||
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
||||||
float templ_sqsum_scale,
|
float templ_sqsum_scale,
|
||||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
const PtrStep<int> image_sum_r, const PtrStep<double> image_sqsum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
const PtrStep<int> image_sum_g, const PtrStep<double> image_sqsum_g,
|
||||||
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
const PtrStep<int> image_sum_b, const PtrStep<double> image_sqsum_b,
|
||||||
PtrStepSzf result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -693,12 +693,12 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
|
||||||
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
int templ_sum_r, double templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
int templ_sum_g, double templ_sqsum_g,
|
||||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
int templ_sum_b, double templ_sqsum_b,
|
||||||
PtrStepSzf result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
@ -732,10 +732,10 @@ namespace cv { namespace cuda { namespace device
|
|||||||
int w, int h, float weight,
|
int w, int h, float weight,
|
||||||
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
||||||
float templ_sum_scale_a, float templ_sqsum_scale,
|
float templ_sum_scale_a, float templ_sqsum_scale,
|
||||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
const PtrStep<int> image_sum_r, const PtrStep<double> image_sqsum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
const PtrStep<int> image_sum_g, const PtrStep<double> image_sqsum_g,
|
||||||
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
const PtrStep<int> image_sum_b, const PtrStep<double> image_sqsum_b,
|
||||||
const PtrStep<unsigned int> image_sum_a, const PtrStep<unsigned long long> image_sqsum_a,
|
const PtrStep<int> image_sum_a, const PtrStep<double> image_sqsum_a,
|
||||||
PtrStepSzf result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -780,14 +780,14 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
|
||||||
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
|
||||||
const PtrStepSz<unsigned int> image_sum_a, const PtrStepSz<unsigned long long> image_sqsum_a,
|
const PtrStepSz<int> image_sum_a, const PtrStepSz<double> image_sqsum_a,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
int templ_sum_r, double templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
int templ_sum_g, double templ_sqsum_g,
|
||||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
int templ_sum_b, double templ_sqsum_b,
|
||||||
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
|
int templ_sum_a, double templ_sqsum_a,
|
||||||
PtrStepSzf result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
@ -823,8 +823,8 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void normalizeKernel_8U(
|
__global__ void normalizeKernel_8U(
|
||||||
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
int w, int h, const PtrStep<double> image_sqsum,
|
||||||
unsigned long long templ_sqsum, PtrStepSzf result)
|
double templ_sqsum, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -838,8 +838,8 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void normalize_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum,
|
void normalize_8U(int w, int h, const PtrStepSz<double> image_sqsum,
|
||||||
unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream)
|
double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
@ -61,77 +61,77 @@ namespace cv { namespace cuda { namespace device
|
|||||||
void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||||
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result,
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result,
|
||||||
int cn, cudaStream_t stream);
|
int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result,
|
||||||
int cn, cudaStream_t stream);
|
int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<unsigned int> image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream);
|
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<int> image_sum, int templ_sum, PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_8UC2(
|
void matchTemplatePrepared_CCOFF_8UC2(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r,
|
const PtrStepSz<int> image_sum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g,
|
const PtrStepSz<int> image_sum_g,
|
||||||
unsigned int templ_sum_r,
|
int templ_sum_r,
|
||||||
unsigned int templ_sum_g,
|
int templ_sum_g,
|
||||||
PtrStepSzf result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_8UC3(
|
void matchTemplatePrepared_CCOFF_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r,
|
const PtrStepSz<int> image_sum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g,
|
const PtrStepSz<int> image_sum_g,
|
||||||
const PtrStepSz<unsigned int> image_sum_b,
|
const PtrStepSz<int> image_sum_b,
|
||||||
unsigned int templ_sum_r,
|
int templ_sum_r,
|
||||||
unsigned int templ_sum_g,
|
int templ_sum_g,
|
||||||
unsigned int templ_sum_b,
|
int templ_sum_b,
|
||||||
PtrStepSzf result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_8UC4(
|
void matchTemplatePrepared_CCOFF_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r,
|
const PtrStepSz<int> image_sum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g,
|
const PtrStepSz<int> image_sum_g,
|
||||||
const PtrStepSz<unsigned int> image_sum_b,
|
const PtrStepSz<int> image_sum_b,
|
||||||
const PtrStepSz<unsigned int> image_sum_a,
|
const PtrStepSz<int> image_sum_a,
|
||||||
unsigned int templ_sum_r,
|
int templ_sum_r,
|
||||||
unsigned int templ_sum_g,
|
int templ_sum_g,
|
||||||
unsigned int templ_sum_b,
|
int templ_sum_b,
|
||||||
unsigned int templ_sum_a,
|
int templ_sum_a,
|
||||||
PtrStepSzf result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
|
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||||
int w, int h, const PtrStepSz<unsigned int> image_sum,
|
int w, int h, const PtrStepSz<int> image_sum,
|
||||||
const PtrStepSz<unsigned long long> image_sqsum,
|
const PtrStepSz<double> image_sqsum,
|
||||||
unsigned int templ_sum, unsigned long long templ_sqsum,
|
int templ_sum, double templ_sqsum,
|
||||||
PtrStepSzf result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
int templ_sum_r, double templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
int templ_sum_g, double templ_sqsum_g,
|
||||||
PtrStepSzf result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
|
||||||
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
int templ_sum_r, double templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
int templ_sum_g, double templ_sqsum_g,
|
||||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
int templ_sum_b, double templ_sqsum_b,
|
||||||
PtrStepSzf result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
|
||||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
|
||||||
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
|
||||||
const PtrStepSz<unsigned int> image_sum_a, const PtrStepSz<unsigned long long> image_sqsum_a,
|
const PtrStepSz<int> image_sum_a, const PtrStepSz<double> image_sqsum_a,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
int templ_sum_r, double templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
int templ_sum_g, double templ_sqsum_g,
|
||||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
int templ_sum_b, double templ_sqsum_b,
|
||||||
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
|
int templ_sum_a, double templ_sqsum_a,
|
||||||
PtrStepSzf result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
|
|
||||||
void normalize_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum,
|
void normalize_8U(int w, int h, const PtrStepSz<double> image_sqsum,
|
||||||
unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream);
|
double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream);
|
void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
@ -290,7 +290,7 @@ namespace
|
|||||||
|
|
||||||
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
|
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
|
||||||
|
|
||||||
unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
|
double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
|
||||||
|
|
||||||
normalize_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
normalize_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
@ -361,7 +361,7 @@ namespace
|
|||||||
|
|
||||||
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
|
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
|
||||||
|
|
||||||
unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
|
double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
|
||||||
|
|
||||||
match_CCORR_.match(image, templ, _result, stream);
|
match_CCORR_.match(image, templ, _result, stream);
|
||||||
GpuMat result = _result.getGpuMat();
|
GpuMat result = _result.getGpuMat();
|
||||||
@ -400,7 +400,7 @@ namespace
|
|||||||
|
|
||||||
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
|
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
|
||||||
|
|
||||||
unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
|
double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
|
||||||
|
|
||||||
match_CCORR_.match(image, templ, _result, stream);
|
match_CCORR_.match(image, templ, _result, stream);
|
||||||
GpuMat result = _result.getGpuMat();
|
GpuMat result = _result.getGpuMat();
|
||||||
@ -446,7 +446,7 @@ namespace
|
|||||||
image_sums_.resize(1);
|
image_sums_.resize(1);
|
||||||
cuda::integral(image, image_sums_[0], intBuffer_, stream);
|
cuda::integral(image, image_sums_[0], intBuffer_, stream);
|
||||||
|
|
||||||
unsigned int templ_sum = (unsigned int) cuda::sum(templ)[0];
|
int templ_sum = (int) cuda::sum(templ)[0];
|
||||||
|
|
||||||
matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sums_[0], templ_sum, result, StreamAccessor::getStream(stream));
|
matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sums_[0], templ_sum, result, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
@ -465,19 +465,19 @@ namespace
|
|||||||
case 2:
|
case 2:
|
||||||
matchTemplatePrepared_CCOFF_8UC2(
|
matchTemplatePrepared_CCOFF_8UC2(
|
||||||
templ.cols, templ.rows, image_sums_[0], image_sums_[1],
|
templ.cols, templ.rows, image_sums_[0], image_sums_[1],
|
||||||
(unsigned int) templ_sum[0], (unsigned int) templ_sum[1],
|
(int) templ_sum[0], (int) templ_sum[1],
|
||||||
result, StreamAccessor::getStream(stream));
|
result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
matchTemplatePrepared_CCOFF_8UC3(
|
matchTemplatePrepared_CCOFF_8UC3(
|
||||||
templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2],
|
templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2],
|
||||||
(unsigned int) templ_sum[0], (unsigned int) templ_sum[1], (unsigned int) templ_sum[2],
|
(int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2],
|
||||||
result, StreamAccessor::getStream(stream));
|
result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
matchTemplatePrepared_CCOFF_8UC4(
|
matchTemplatePrepared_CCOFF_8UC4(
|
||||||
templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], image_sums_[3],
|
templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], image_sums_[3],
|
||||||
(unsigned int) templ_sum[0], (unsigned int) templ_sum[1], (unsigned int) templ_sum[2], (unsigned int) templ_sum[3],
|
(int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2], (int) templ_sum[3],
|
||||||
result, StreamAccessor::getStream(stream));
|
result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -532,8 +532,8 @@ namespace
|
|||||||
image_sqsums_.resize(1);
|
image_sqsums_.resize(1);
|
||||||
cuda::sqrIntegral(image, image_sqsums_[0], intBuffer_, stream);
|
cuda::sqrIntegral(image, image_sqsums_[0], intBuffer_, stream);
|
||||||
|
|
||||||
unsigned int templ_sum = (unsigned int) cuda::sum(templ)[0];
|
int templ_sum = (int) cuda::sum(templ)[0];
|
||||||
unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ)[0];
|
double templ_sqsum = cuda::sqrSum(templ)[0];
|
||||||
|
|
||||||
matchTemplatePrepared_CCOFF_NORMED_8U(
|
matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||||
templ.cols, templ.rows, image_sums_[0], image_sqsums_[0],
|
templ.cols, templ.rows, image_sums_[0], image_sqsums_[0],
|
||||||
@ -561,8 +561,8 @@ namespace
|
|||||||
templ.cols, templ.rows,
|
templ.cols, templ.rows,
|
||||||
image_sums_[0], image_sqsums_[0],
|
image_sums_[0], image_sqsums_[0],
|
||||||
image_sums_[1], image_sqsums_[1],
|
image_sums_[1], image_sqsums_[1],
|
||||||
(unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
|
(int)templ_sum[0], templ_sqsum[0],
|
||||||
(unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
|
(int)templ_sum[1], templ_sqsum[1],
|
||||||
result, StreamAccessor::getStream(stream));
|
result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
@ -571,9 +571,9 @@ namespace
|
|||||||
image_sums_[0], image_sqsums_[0],
|
image_sums_[0], image_sqsums_[0],
|
||||||
image_sums_[1], image_sqsums_[1],
|
image_sums_[1], image_sqsums_[1],
|
||||||
image_sums_[2], image_sqsums_[2],
|
image_sums_[2], image_sqsums_[2],
|
||||||
(unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
|
(int)templ_sum[0], templ_sqsum[0],
|
||||||
(unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
|
(int)templ_sum[1], templ_sqsum[1],
|
||||||
(unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
|
(int)templ_sum[2], templ_sqsum[2],
|
||||||
result, StreamAccessor::getStream(stream));
|
result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
@ -583,10 +583,10 @@ namespace
|
|||||||
image_sums_[1], image_sqsums_[1],
|
image_sums_[1], image_sqsums_[1],
|
||||||
image_sums_[2], image_sqsums_[2],
|
image_sums_[2], image_sqsums_[2],
|
||||||
image_sums_[3], image_sqsums_[3],
|
image_sums_[3], image_sqsums_[3],
|
||||||
(unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
|
(int)templ_sum[0], templ_sqsum[0],
|
||||||
(unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
|
(int)templ_sum[1], templ_sqsum[1],
|
||||||
(unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
|
(int)templ_sum[2], templ_sqsum[2],
|
||||||
(unsigned int)templ_sum[3], (unsigned long long)templ_sqsum[3],
|
(int)templ_sum[3], templ_sqsum[3],
|
||||||
result, StreamAccessor::getStream(stream));
|
result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
Loading…
Reference in New Issue
Block a user