Fixed gpu::matchTemplate for correct handling of big templates. Added tests
This commit is contained in:
parent
098fc1a62e
commit
5434a9a5ec
@ -216,7 +216,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Prepared_SQDIFF
|
// Prepared_SQDIFF
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result)
|
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -232,7 +232,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream)
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -244,10 +244,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, int cn,
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, int cn,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -284,7 +284,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result)
|
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(
|
||||||
|
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
||||||
|
unsigned long long templ_sqsum, DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -301,7 +303,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
@ -315,10 +317,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
|
||||||
DevMem2Df result, int cn, cudaStream_t stream)
|
DevMem2Df result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>
|
0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>
|
||||||
|
@ -1739,7 +1739,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void sqrSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void sqrSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
typedef typename SumType<T>::R R;
|
typedef double R;
|
||||||
|
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
||||||
|
@ -62,10 +62,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
void matchTemplateNaive_SQDIFF_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_SQDIFF_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
||||||
void matchTemplateNaive_SQDIFF_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_SQDIFF_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result,
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result,
|
||||||
int cn, cudaStream_t stream);
|
int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result,
|
||||||
int cn, cudaStream_t stream);
|
int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_<unsigned int> image_sum, unsigned int templ_sum, DevMem2Df result, cudaStream_t stream);
|
void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_<unsigned int> image_sum, unsigned int templ_sum, DevMem2Df result, cudaStream_t stream);
|
||||||
@ -248,7 +248,7 @@ namespace
|
|||||||
GpuMat img_sqsum;
|
GpuMat img_sqsum;
|
||||||
sqrIntegral(image.reshape(1), img_sqsum, stream);
|
sqrIntegral(image.reshape(1), img_sqsum, stream);
|
||||||
|
|
||||||
unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];
|
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||||
|
|
||||||
matchTemplate_CCORR_8U(image, templ, result, stream);
|
matchTemplate_CCORR_8U(image, templ, result, stream);
|
||||||
matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
@ -260,7 +260,7 @@ namespace
|
|||||||
GpuMat img_sqsum;
|
GpuMat img_sqsum;
|
||||||
sqrIntegral(image.reshape(1), img_sqsum, stream);
|
sqrIntegral(image.reshape(1), img_sqsum, stream);
|
||||||
|
|
||||||
unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];
|
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||||
|
|
||||||
matchTemplate_CCORR_8U(image, templ, result, stream);
|
matchTemplate_CCORR_8U(image, templ, result, stream);
|
||||||
matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
|
@ -2775,6 +2775,52 @@ INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, Combine(
|
|||||||
ALL_DEVICES,
|
ALL_DEVICES,
|
||||||
Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")))));
|
Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")))));
|
||||||
|
|
||||||
|
|
||||||
|
class MatchTemplate_CanFindBigTemplate : public TestWithParam<cv::gpu::DeviceInfo>
|
||||||
|
{
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
cv::gpu::setDevice(GetParam().deviceID());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF_NORMED)
|
||||||
|
{
|
||||||
|
cv::Mat scene = readImage("matchtemplate/scene.jpg");
|
||||||
|
cv::Mat templ = readImage("matchtemplate/template.jpg");
|
||||||
|
|
||||||
|
cv::gpu::GpuMat d_scene(scene), d_templ(templ), d_result;
|
||||||
|
cv::gpu::matchTemplate(d_scene, d_templ, d_result, CV_TM_SQDIFF_NORMED);
|
||||||
|
|
||||||
|
double minVal;
|
||||||
|
cv::Point minLoc;
|
||||||
|
cv::gpu::minMaxLoc(d_result, &minVal, 0, &minLoc, 0);
|
||||||
|
|
||||||
|
ASSERT_GE(minVal, 0);
|
||||||
|
ASSERT_LT(minVal, 1e-3);
|
||||||
|
ASSERT_EQ(344, minLoc.x);
|
||||||
|
ASSERT_EQ(0, minLoc.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF)
|
||||||
|
{
|
||||||
|
cv::Mat scene = readImage("matchtemplate/scene.jpg");
|
||||||
|
cv::Mat templ = readImage("matchtemplate/template.jpg");
|
||||||
|
|
||||||
|
cv::gpu::GpuMat d_scene(scene), d_templ(templ), d_result;
|
||||||
|
cv::gpu::matchTemplate(d_scene, d_templ, d_result, CV_TM_SQDIFF);
|
||||||
|
|
||||||
|
double minVal;
|
||||||
|
cv::Point minLoc;
|
||||||
|
cv::gpu::minMaxLoc(d_result, &minVal, 0, &minLoc, 0);
|
||||||
|
|
||||||
|
ASSERT_GE(minVal, 0);
|
||||||
|
ASSERT_EQ(344, minLoc.x);
|
||||||
|
ASSERT_EQ(0, minLoc.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CanFindBigTemplate, ALL_DEVICES);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// MulSpectrums
|
// MulSpectrums
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user