Merge release 2.4.4

This commit is contained in:
Andrey Kamaev
2013-03-04 19:29:40 +04:00
64 changed files with 11575 additions and 3169 deletions

View File

@@ -3,15 +3,14 @@
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// StereoBM
typedef std::tr1::tuple<string, string> pair_string;
DEF_PARAM_TEST_1(ImagePair, pair_string);
PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
PERF_TEST_P(ImagePair, Calib3D_StereoBM,
Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
{
declare.time(5.0);
@@ -28,18 +27,13 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png",
{
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_imgLeft(imgLeft);
const cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat dst;
d_bm(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE() d_bm(d_imgLeft, d_imgRight, dst);
TEST_CYCLE()
{
d_bm(d_imgLeft, d_imgRight, d_dst);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
@@ -47,12 +41,7 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png",
cv::Mat dst;
bm(imgLeft, imgRight, dst);
TEST_CYCLE()
{
bm(imgLeft, imgRight, dst);
}
TEST_CYCLE() bm(imgLeft, imgRight, dst);
CPU_SANITY_CHECK(dst);
}
@@ -61,7 +50,8 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png",
//////////////////////////////////////////////////////////////////////
// StereoBeliefPropagation
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation,
Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
{
declare.time(10.0);
@@ -77,29 +67,25 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(pair_string("gpu/
{
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_imgLeft(imgLeft);
const cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat dst;
d_bp(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE() d_bp(d_imgLeft, d_imgRight, dst);
TEST_CYCLE()
{
d_bp(d_imgLeft, d_imgRight, d_dst);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
FAIL_NO_CPU();
}
}
//////////////////////////////////////////////////////////////////////
// StereoConstantSpaceBP
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP,
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
{
declare.time(10.0);
@@ -115,29 +101,25 @@ PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(pair_string("gpu/st
{
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_imgLeft(imgLeft);
const cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat dst;
d_csbp(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE() d_csbp(d_imgLeft, d_imgRight, dst);
TEST_CYCLE()
{
d_csbp(d_imgLeft, d_imgRight, d_dst);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
FAIL_NO_CPU();
}
}
//////////////////////////////////////////////////////////////////////
// DisparityBilateralFilter
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter,
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
{
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
@@ -151,22 +133,17 @@ PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu
{
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_disp(disp);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_img(img);
const cv::gpu::GpuMat d_disp(disp);
cv::gpu::GpuMat dst;
d_filter(d_disp, d_img, d_dst);
TEST_CYCLE() d_filter(d_disp, d_img, dst);
TEST_CYCLE()
{
d_filter(d_disp, d_img, d_dst);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
FAIL_NO_CPU();
}
}
@@ -175,45 +152,42 @@ PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu
DEF_PARAM_TEST_1(Count, int);
PERF_TEST_P(Count, Calib3D_TransformPoints, Values(5000, 10000, 20000))
PERF_TEST_P(Count, Calib3D_TransformPoints,
Values(5000, 10000, 20000))
{
const int count = GetParam();
cv::Mat src(1, count, CV_32FC3);
fillRandom(src, -100, 100);
declare.in(src, WARMUP_RNG);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
TEST_CYCLE() cv::gpu::transformPoints(d_src, rvec, tvec, dst);
TEST_CYCLE()
{
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
FAIL_NO_CPU();
}
}
//////////////////////////////////////////////////////////////////////
// ProjectPoints
PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
PERF_TEST_P(Count, Calib3D_ProjectPoints,
Values(5000, 10000, 20000))
{
const int count = GetParam();
cv::Mat src(1, count, CV_32FC3);
fillRandom(src, -100, 100);
declare.in(src, WARMUP_RNG);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
@@ -221,28 +195,18 @@ PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
TEST_CYCLE() cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), dst);
TEST_CYCLE()
{
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
TEST_CYCLE()
{
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
}
TEST_CYCLE() cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
CPU_SANITY_CHECK(dst);
}
@@ -251,17 +215,18 @@ PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
//////////////////////////////////////////////////////////////////////
// SolvePnPRansac
PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
PERF_TEST_P(Count, Calib3D_SolvePnPRansac,
Values(5000, 10000, 20000))
{
declare.time(10.0);
const int count = GetParam();
cv::Mat object(1, count, CV_32FC3);
fillRandom(object, -100, 100);
declare.in(object, WARMUP_RNG);
cv::Mat camera_mat(3, 3, CV_32FC1);
fillRandom(camera_mat, 0.5, 1);
cv::randu(camera_mat, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
@@ -269,79 +234,66 @@ PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
std::vector<cv::Point2f> image_vec;
cv::Mat rvec_gold(1, 3, CV_32FC1);
fillRandom(rvec_gold, 0, 1);
cv::randu(rvec_gold, 0, 1);
cv::Mat tvec_gold(1, 3, CV_32FC1);
fillRandom(tvec_gold, 0, 1);
cv::randu(tvec_gold, 0, 1);
std::vector<cv::Point2f> image_vec;
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
const cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
cv::Mat rvec;
cv::Mat tvec;
if (PERF_RUN_GPU())
{
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
TEST_CYCLE() cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
TEST_CYCLE()
{
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
GPU_SANITY_CHECK(rvec, 1e-3);
GPU_SANITY_CHECK(tvec, 1e-3);
}
else
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
TEST_CYCLE() cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
TEST_CYCLE()
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
CPU_SANITY_CHECK(rvec, 1e-6);
CPU_SANITY_CHECK(tvec, 1e-6);
}
CPU_SANITY_CHECK(rvec);
CPU_SANITY_CHECK(tvec);
}
//////////////////////////////////////////////////////////////////////
// ReprojectImageTo3D
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16S)))
{
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
cv::Mat src(size, depth);
fillRandom(src, 5.0, 30.0);
declare.in(src, WARMUP_RNG);
cv::Mat Q(4, 4, CV_32FC1);
fillRandom(Q, 0.1, 1.0);
cv::randu(Q, 0.1, 1.0);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
TEST_CYCLE() cv::gpu::reprojectImageTo3D(d_src, dst, Q);
TEST_CYCLE()
{
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::reprojectImageTo3D(src, dst, Q);
TEST_CYCLE()
{
cv::reprojectImageTo3D(src, dst, Q);
}
TEST_CYCLE() cv::reprojectImageTo3D(src, dst, Q);
CPU_SANITY_CHECK(dst);
}
@@ -350,32 +302,27 @@ PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES,
//////////////////////////////////////////////////////////////////////
// DrawColorDisp
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16S)))
{
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src, 0, 255);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::drawColorDisp(d_src, d_dst, 255);
TEST_CYCLE() cv::gpu::drawColorDisp(d_src, dst, 255);
TEST_CYCLE()
{
cv::gpu::drawColorDisp(d_src, d_dst, 255);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
FAIL_NO_CPU();
}
}
} // namespace

File diff suppressed because it is too large Load Diff

View File

@@ -3,8 +3,7 @@
using namespace std;
using namespace testing;
#define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::szXGA, perf::sz720p, perf::sz1080p)
#define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::sz720p)
//////////////////////////////////////////////////////////////////////
// BilateralFilter
@@ -12,96 +11,86 @@ using namespace testing;
DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int);
PERF_TEST_P(Sz_Depth_Cn_KernelSz, Denoising_BilateralFilter,
Combine(GPU_DENOISING_IMAGE_SIZES, Values(CV_8U, CV_32F), GPU_CHANNELS_1_3, Values(3, 5, 9)))
Combine(GPU_DENOISING_IMAGE_SIZES,
Values(CV_8U, CV_32F),
GPU_CHANNELS_1_3,
Values(3, 5, 9)))
{
declare.time(60.0);
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
int kernel_size = GET_PARAM(3);
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
const int channels = GET_PARAM(2);
const int kernel_size = GET_PARAM(3);
float sigma_color = 7;
float sigma_spatial = 5;
int borderMode = cv::BORDER_REFLECT101;
const float sigma_color = 7;
const float sigma_spatial = 5;
const int borderMode = cv::BORDER_REFLECT101;
int type = CV_MAKE_TYPE(depth, channels);
const int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::bilateralFilter(d_src, d_dst, kernel_size, sigma_color, sigma_spatial, borderMode);
TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
TEST_CYCLE()
{
cv::gpu::bilateralFilter(d_src, d_dst, kernel_size, sigma_color, sigma_spatial, borderMode);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
TEST_CYCLE()
{
cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
}
TEST_CYCLE() cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// nonLocalMeans
DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), GPU_CHANNELS_1_3, Values(21), Values(5, 7)))
Combine(GPU_DENOISING_IMAGE_SIZES,
Values<MatDepth>(CV_8U),
GPU_CHANNELS_1_3,
Values(21),
Values(5)))
{
declare.time(60.0);
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
const int channels = GET_PARAM(2);
const int search_widow_size = GET_PARAM(3);
const int block_size = GET_PARAM(4);
int search_widow_size = GET_PARAM(3);
int block_size = GET_PARAM(4);
const float h = 10;
const int borderMode = cv::BORDER_REFLECT101;
float h = 10;
int borderMode = cv::BORDER_REFLECT101;
int type = CV_MAKE_TYPE(depth, channels);
const int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::nonLocalMeans(d_src, d_dst, h, search_widow_size, block_size, borderMode);
TEST_CYCLE() cv::gpu::nonLocalMeans(d_src, dst, h, search_widow_size, block_size, borderMode);
TEST_CYCLE()
{
cv::gpu::nonLocalMeans(d_src, d_dst, h, search_widow_size, block_size, borderMode);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL() << "No such CPU implementation analogy";
FAIL_NO_CPU();
}
}
@@ -112,46 +101,41 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), GPU_CHANNELS_1_3, Values(21), Values(7)))
Combine(GPU_DENOISING_IMAGE_SIZES,
Values<MatDepth>(CV_8U),
GPU_CHANNELS_1_3,
Values(21),
Values(7)))
{
declare.time(150.0);
declare.time(60.0);
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
const int search_widow_size = GET_PARAM(2);
const int block_size = GET_PARAM(3);
int search_widow_size = GET_PARAM(2);
int block_size = GET_PARAM(3);
float h = 10;
int type = CV_MAKE_TYPE(depth, 1);
const float h = 10;
const int type = CV_MAKE_TYPE(depth, 1);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::FastNonLocalMeansDenoising fnlmd;
fnlmd.simpleMethod(d_src, d_dst, h, search_widow_size, block_size);
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
TEST_CYCLE()
{
fnlmd.simpleMethod(d_src, d_dst, h, search_widow_size, block_size);
}
TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size);
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
TEST_CYCLE()
{
cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
}
TEST_CYCLE() cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
CPU_SANITY_CHECK(dst);
}
@@ -163,47 +147,41 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
DEF_PARAM_TEST(Sz_Depth_WinSz_BlockSz, cv::Size, MatDepth, int, int);
PERF_TEST_P(Sz_Depth_WinSz_BlockSz, Denoising_FastNonLocalMeansColored,
Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), Values(21), Values(7)))
Combine(GPU_DENOISING_IMAGE_SIZES,
Values<MatDepth>(CV_8U),
Values(21),
Values(7)))
{
declare.time(350.0);
declare.time(60.0);
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
const int search_widow_size = GET_PARAM(2);
const int block_size = GET_PARAM(3);
int search_widow_size = GET_PARAM(2);
int block_size = GET_PARAM(3);
float h = 10;
int type = CV_MAKE_TYPE(depth, 3);
const float h = 10;
const int type = CV_MAKE_TYPE(depth, 3);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::FastNonLocalMeansDenoising fnlmd;
fnlmd.labMethod(d_src, d_dst, h, h, search_widow_size, block_size);
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
TEST_CYCLE()
{
fnlmd.labMethod(d_src, d_dst, h, h, search_widow_size, block_size);
}
TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size);
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
TEST_CYCLE()
{
cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
}
TEST_CYCLE() cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
CPU_SANITY_CHECK(dst);
}
}
}

View File

@@ -3,139 +3,194 @@
using namespace std;
using namespace testing;
namespace {
struct KeypointIdxCompare
{
std::vector<cv::KeyPoint>* keypoints;
explicit KeypointIdxCompare(std::vector<cv::KeyPoint>* _keypoints) : keypoints(_keypoints) {}
bool operator ()(size_t i1, size_t i2) const
{
cv::KeyPoint kp1 = (*keypoints)[i1];
cv::KeyPoint kp2 = (*keypoints)[i2];
if (kp1.pt.x != kp2.pt.x)
return kp1.pt.x < kp2.pt.x;
if (kp1.pt.y != kp2.pt.y)
return kp1.pt.y < kp2.pt.y;
if (kp1.response != kp2.response)
return kp1.response < kp2.response;
return kp1.octave < kp2.octave;
}
};
static void sortKeyPoints(std::vector<cv::KeyPoint>& keypoints, cv::InputOutputArray _descriptors = cv::noArray())
{
std::vector<size_t> indexies(keypoints.size());
for (size_t i = 0; i < indexies.size(); ++i)
indexies[i] = i;
std::sort(indexies.begin(), indexies.end(), KeypointIdxCompare(&keypoints));
std::vector<cv::KeyPoint> new_keypoints;
cv::Mat new_descriptors;
new_keypoints.resize(keypoints.size());
cv::Mat descriptors;
if (_descriptors.needed())
{
descriptors = _descriptors.getMat();
new_descriptors.create(descriptors.size(), descriptors.type());
}
for (size_t i = 0; i < indexies.size(); ++i)
{
size_t new_idx = indexies[i];
new_keypoints[i] = keypoints[new_idx];
if (!new_descriptors.empty())
descriptors.row((int) new_idx).copyTo(new_descriptors.row((int) i));
}
keypoints.swap(new_keypoints);
if (_descriptors.needed())
new_descriptors.copyTo(_descriptors);
}
//////////////////////////////////////////////////////////////////////
// SURF
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, Features2D_SURF, Values<string>("gpu/perf/aloe.png"))
PERF_TEST_P(Image, Features2D_SURF,
Values<string>("gpu/perf/aloe.png"))
{
declare.time(50.0);
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::SURF_GPU d_surf;
cv::gpu::GpuMat d_img(img);
const cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE() d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE()
{
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
}
std::vector<cv::KeyPoint> gpu_keypoints;
d_surf.downloadKeypoints(d_keypoints, gpu_keypoints);
GPU_SANITY_CHECK(d_descriptors, 1e-4);
GPU_SANITY_CHECK_KEYPOINTS(SURF, d_keypoints);
cv::Mat gpu_descriptors(d_descriptors);
sortKeyPoints(gpu_keypoints, gpu_descriptors);
SANITY_CHECK_KEYPOINTS(gpu_keypoints);
SANITY_CHECK(gpu_descriptors, 1e-3);
}
else
{
cv::SURF surf;
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
std::vector<cv::KeyPoint> cpu_keypoints;
cv::Mat cpu_descriptors;
surf(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE() surf(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
TEST_CYCLE()
{
keypoints.clear();
surf(img, cv::noArray(), keypoints, descriptors);
}
SANITY_CHECK_KEYPOINTS(keypoints);
SANITY_CHECK(descriptors, 1e-4);
SANITY_CHECK_KEYPOINTS(cpu_keypoints);
SANITY_CHECK(cpu_descriptors);
}
}
//////////////////////////////////////////////////////////////////////
// FAST
PERF_TEST_P(Image, Features2D_FAST, Values<string>("gpu/perf/aloe.png"))
DEF_PARAM_TEST(Image_Threshold_NonMaxSupression, string, int, bool);
PERF_TEST_P(Image_Threshold_NonMaxSupression, Features2D_FAST,
Combine(Values<string>("gpu/perf/aloe.png"),
Values(20),
Bool()))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
const int threshold = GET_PARAM(1);
const bool nonMaxSuppersion = GET_PARAM(2);
if (PERF_RUN_GPU())
{
cv::gpu::FAST_GPU d_fast(20);
cv::gpu::FAST_GPU d_fast(threshold, nonMaxSuppersion, 0.5);
cv::gpu::GpuMat d_img(img);
const cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints;
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
TEST_CYCLE() d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
TEST_CYCLE()
{
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
}
std::vector<cv::KeyPoint> gpu_keypoints;
d_fast.downloadKeypoints(d_keypoints, gpu_keypoints);
GPU_SANITY_CHECK_RESPONSE(FAST, d_keypoints);
sortKeyPoints(gpu_keypoints);
SANITY_CHECK_KEYPOINTS(gpu_keypoints);
}
else
{
std::vector<cv::KeyPoint> keypoints;
std::vector<cv::KeyPoint> cpu_keypoints;
cv::FAST(img, keypoints, 20);
TEST_CYCLE() cv::FAST(img, cpu_keypoints, threshold, nonMaxSuppersion);
TEST_CYCLE()
{
keypoints.clear();
cv::FAST(img, keypoints, 20);
}
SANITY_CHECK_KEYPOINTS(keypoints);
SANITY_CHECK_KEYPOINTS(cpu_keypoints);
}
}
//////////////////////////////////////////////////////////////////////
// ORB
PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.png"))
DEF_PARAM_TEST(Image_NFeatures, string, int);
PERF_TEST_P(Image_NFeatures, Features2D_ORB,
Combine(Values<string>("gpu/perf/aloe.png"),
Values(4000)))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
const int nFeatures = GET_PARAM(1);
if (PERF_RUN_GPU())
{
cv::gpu::ORB_GPU d_orb(4000);
cv::gpu::ORB_GPU d_orb(nFeatures);
cv::gpu::GpuMat d_img(img);
const cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE() d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE()
{
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
}
std::vector<cv::KeyPoint> gpu_keypoints;
d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints);
GPU_SANITY_CHECK_KEYPOINTS(ORB, d_keypoints);
GPU_SANITY_CHECK(d_descriptors);
cv::Mat gpu_descriptors(d_descriptors);
gpu_keypoints.resize(10);
gpu_descriptors = gpu_descriptors.rowRange(0, 10);
sortKeyPoints(gpu_keypoints, gpu_descriptors);
SANITY_CHECK_KEYPOINTS(gpu_keypoints);
SANITY_CHECK(gpu_descriptors);
}
else
{
cv::ORB orb(4000);
cv::ORB orb(nFeatures);
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
std::vector<cv::KeyPoint> cpu_keypoints;
cv::Mat cpu_descriptors;
orb(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE() orb(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
TEST_CYCLE()
{
keypoints.clear();
orb(img, cv::noArray(), keypoints, descriptors);
}
SANITY_CHECK_KEYPOINTS(keypoints);
SANITY_CHECK(descriptors);
SANITY_CHECK_KEYPOINTS(cpu_keypoints);
SANITY_CHECK(cpu_descriptors);
}
}
@@ -144,166 +199,165 @@ PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.png"))
DEF_PARAM_TEST(DescSize_Norm, int, NormType);
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch,
Combine(Values(64, 128, 256),
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
declare.time(20.0);
int desc_size = GET_PARAM(0);
int normType = GET_PARAM(1);
const int desc_size = GET_PARAM(0);
const int normType = GET_PARAM(1);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
const int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fillRandom(query);
declare.in(query, WARMUP_RNG);
cv::Mat train(3000, desc_size, type);
fillRandom(train);
declare.in(train, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
const cv::gpu::GpuMat d_query(query);
const cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_distance;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
TEST_CYCLE()
{
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
}
std::vector<cv::DMatch> gpu_matches;
d_matcher.matchDownload(d_trainIdx, d_distance, gpu_matches);
GPU_SANITY_CHECK(d_trainIdx);
GPU_SANITY_CHECK(d_distance);
SANITY_CHECK_MATCHES(gpu_matches);
}
else
{
cv::BFMatcher matcher(normType);
std::vector<cv::DMatch> matches;
std::vector<cv::DMatch> cpu_matches;
matcher.match(query, train, matches);
TEST_CYCLE() matcher.match(query, train, cpu_matches);
TEST_CYCLE()
{
matcher.match(query, train, matches);
}
SANITY_CHECK(matches);
SANITY_CHECK_MATCHES(cpu_matches);
}
}
//////////////////////////////////////////////////////////////////////
// BFKnnMatch
static void toOneRowMatches(const std::vector< std::vector<cv::DMatch> >& src, std::vector<cv::DMatch>& dst)
{
dst.clear();
for (size_t i = 0; i < src.size(); ++i)
for (size_t j = 0; j < src[i].size(); ++j)
dst.push_back(src[i][j]);
}
DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine(
Values(64, 128, 256),
Values(2, 3),
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch,
Combine(Values(64, 128, 256),
Values(2, 3),
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
{
declare.time(30.0);
int desc_size = GET_PARAM(0);
int k = GET_PARAM(1);
int normType = GET_PARAM(2);
const int desc_size = GET_PARAM(0);
const int k = GET_PARAM(1);
const int normType = GET_PARAM(2);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
const int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fillRandom(query);
declare.in(query, WARMUP_RNG);
cv::Mat train(3000, desc_size, type);
fillRandom(train);
declare.in(train, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
const cv::gpu::GpuMat d_query(query);
const cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_distance, d_allDist;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
TEST_CYCLE()
{
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
}
std::vector< std::vector<cv::DMatch> > matchesTbl;
d_matcher.knnMatchDownload(d_trainIdx, d_distance, matchesTbl);
GPU_SANITY_CHECK(d_trainIdx);
GPU_SANITY_CHECK(d_distance);
std::vector<cv::DMatch> gpu_matches;
toOneRowMatches(matchesTbl, gpu_matches);
SANITY_CHECK_MATCHES(gpu_matches);
}
else
{
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
std::vector< std::vector<cv::DMatch> > matchesTbl;
matcher.knnMatch(query, train, matches, k);
TEST_CYCLE() matcher.knnMatch(query, train, matchesTbl, k);
TEST_CYCLE()
{
matcher.knnMatch(query, train, matches, k);
}
std::vector<cv::DMatch> cpu_matches;
toOneRowMatches(matchesTbl, cpu_matches);
SANITY_CHECK(matches);
SANITY_CHECK_MATCHES(cpu_matches);
}
}
//////////////////////////////////////////////////////////////////////
// BFRadiusMatch
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch,
Combine(Values(64, 128, 256),
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
{
declare.time(30.0);
int desc_size = GET_PARAM(0);
int normType = GET_PARAM(1);
const int desc_size = GET_PARAM(0);
const int normType = GET_PARAM(1);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
const int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
const float maxDistance = 10000;
cv::Mat query(3000, desc_size, type);
fillRandom(query, 0.0, 1.0);
declare.in(query, WARMUP_RNG);
cv::Mat train(3000, desc_size, type);
fillRandom(train, 0.0, 1.0);
declare.in(train, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
const cv::gpu::GpuMat d_query(query);
const cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_nMatches, d_distance;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance);
TEST_CYCLE()
{
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
}
std::vector< std::vector<cv::DMatch> > matchesTbl;
d_matcher.radiusMatchDownload(d_trainIdx, d_distance, d_nMatches, matchesTbl);
GPU_SANITY_CHECK(d_trainIdx);
GPU_SANITY_CHECK(d_distance);
std::vector<cv::DMatch> gpu_matches;
toOneRowMatches(matchesTbl, gpu_matches);
SANITY_CHECK_MATCHES(gpu_matches);
}
else
{
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
std::vector< std::vector<cv::DMatch> > matchesTbl;
matcher.radiusMatch(query, train, matches, 2.0);
TEST_CYCLE() matcher.radiusMatch(query, train, matchesTbl, maxDistance);
TEST_CYCLE()
{
matcher.radiusMatch(query, train, matches, 2.0);
}
std::vector<cv::DMatch> cpu_matches;
toOneRowMatches(matchesTbl, cpu_matches);
SANITY_CHECK(matches);
SANITY_CHECK_MATCHES(cpu_matches);
}
}
} // namespace

View File

@@ -3,48 +3,39 @@
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// Blur
DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), Values(3, 5, 7)))
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8UC1, CV_8UC4),
Values(3, 5, 7)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
const int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize));
TEST_CYCLE()
{
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::blur(src, dst, cv::Size(ksize, ksize));
TEST_CYCLE()
{
cv::blur(src, dst, cv::Size(ksize, ksize));
}
TEST_CYCLE() cv::blur(src, dst, cv::Size(ksize, ksize));
CPU_SANITY_CHECK(dst);
}
@@ -57,38 +48,28 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Valu
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
const int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat d_buf;
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
TEST_CYCLE() cv::gpu::Sobel(d_src, dst, -1, 1, 1, d_buf, ksize);
TEST_CYCLE()
{
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::Sobel(src, dst, -1, 1, 1, ksize);
TEST_CYCLE()
{
cv::Sobel(src, dst, -1, 1, 1, ksize);
}
TEST_CYCLE() cv::Sobel(src, dst, -1, 1, 1, ksize);
CPU_SANITY_CHECK(dst);
}
@@ -101,37 +82,27 @@ PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat d_buf;
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
TEST_CYCLE() cv::gpu::Scharr(d_src, dst, -1, 1, 0, d_buf);
TEST_CYCLE()
{
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::Scharr(src, dst, -1, 1, 0);
TEST_CYCLE()
{
cv::Scharr(src, dst, -1, 1, 0);
}
TEST_CYCLE() cv::Scharr(src, dst, -1, 1, 0);
CPU_SANITY_CHECK(dst);
}
@@ -144,38 +115,28 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZE
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
const int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat d_buf;
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
TEST_CYCLE() cv::gpu::GaussianBlur(d_src, dst, cv::Size(ksize, ksize), d_buf, 0.5);
TEST_CYCLE()
{
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
TEST_CYCLE()
{
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
}
TEST_CYCLE() cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
CPU_SANITY_CHECK(dst);
}
@@ -188,37 +149,27 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES,
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
const int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
TEST_CYCLE() cv::gpu::Laplacian(d_src, dst, -1, ksize);
TEST_CYCLE()
{
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::Laplacian(src, dst, -1, ksize);
TEST_CYCLE()
{
cv::Laplacian(src, dst, -1, ksize);
}
TEST_CYCLE() cv::Laplacian(src, dst, -1, ksize);
CPU_SANITY_CHECK(dst);
}
@@ -231,39 +182,29 @@ PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
const cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat d_buf;
cv::gpu::erode(d_src, d_dst, ker, d_buf);
TEST_CYCLE() cv::gpu::erode(d_src, dst, ker, d_buf);
TEST_CYCLE()
{
cv::gpu::erode(d_src, d_dst, ker, d_buf);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::erode(src, dst, ker);
TEST_CYCLE()
{
cv::erode(src, dst, ker);
}
TEST_CYCLE() cv::erode(src, dst, ker);
CPU_SANITY_CHECK(dst);
}
@@ -276,39 +217,29 @@ PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
const cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat d_buf;
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
TEST_CYCLE() cv::gpu::dilate(d_src, dst, ker, d_buf);
TEST_CYCLE()
{
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::dilate(src, dst, ker);
TEST_CYCLE()
{
cv::dilate(src, dst, ker);
}
TEST_CYCLE() cv::dilate(src, dst, ker);
CPU_SANITY_CHECK(dst);
}
@@ -326,41 +257,31 @@ PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Val
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int morphOp = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
const int morphOp = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
const cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat d_buf1;
cv::gpu::GpuMat d_buf2;
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
TEST_CYCLE() cv::gpu::morphologyEx(d_src, dst, morphOp, ker, d_buf1, d_buf2);
TEST_CYCLE()
{
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::morphologyEx(src, dst, morphOp, ker);
TEST_CYCLE()
{
cv::morphologyEx(src, dst, morphOp, ker);
}
TEST_CYCLE() cv::morphologyEx(src, dst, morphOp, ker);
CPU_SANITY_CHECK(dst);
}
@@ -373,43 +294,31 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, V
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
const int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
declare.in(src, WARMUP_RNG);
cv::Mat kernel(ksize, ksize, CV_32FC1);
fillRandom(kernel, 0.0, 1.0);
declare.in(kernel, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
TEST_CYCLE() cv::gpu::filter2D(d_src, dst, -1, kernel);
TEST_CYCLE()
{
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
cv::filter2D(src, dst, -1, kernel);
TEST_CYCLE()
{
cv::filter2D(src, dst, -1, kernel);
}
TEST_CYCLE() cv::filter2D(src, dst, -1, kernel);
CPU_SANITY_CHECK(dst);
}
}
} // namespace

File diff suppressed because it is too large Load Diff

View File

@@ -3,8 +3,6 @@
using namespace std;
using namespace testing;
namespace {
DEF_PARAM_TEST_1(Image, string);
struct GreedyLabeling
@@ -100,28 +98,45 @@ struct GreedyLabeling
dot* stack;
};
PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/aloe-disp.png"))
PERF_TEST_P(Image, DISABLED_Labeling_ConnectivityMask,
Values<string>("gpu/labeling/aloe-disp.png"))
{
declare.time(1.0);
cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
const cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_image(image);
cv::gpu::GpuMat mask;
mask.create(image.rows, image.cols, CV_8UC1);
TEST_CYCLE() cv::gpu::connectivityMask(d_image, mask, cv::Scalar::all(0), cv::Scalar::all(2));
GPU_SANITY_CHECK(mask);
}
else
{
FAIL_NO_CPU();
}
}
PERF_TEST_P(Image, DISABLED_Labeling_ConnectedComponents,
Values<string>("gpu/labeling/aloe-disp.png"))
{
declare.time(1.0);
const cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_mask;
cv::gpu::connectivityMask(cv::gpu::GpuMat(image), d_mask, cv::Scalar::all(0), cv::Scalar::all(2));
cv::gpu::GpuMat components;
components.create(image.rows, image.cols, CV_32SC1);
cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2));
ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components));
TEST_CYCLE()
{
cv::gpu::labelComponents(mask, components);
}
TEST_CYCLE() cv::gpu::labelComponents(d_mask, components);
GPU_SANITY_CHECK(components);
}
@@ -129,17 +144,9 @@ PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/al
{
GreedyLabeling host(image);
host(host._labels);
TEST_CYCLE() host(host._labels);
declare.time(1.0);
TEST_CYCLE()
{
host(host._labels);
}
CPU_SANITY_CHECK(host._labels);
cv::Mat components = host._labels;
CPU_SANITY_CHECK(components);
}
}
} // namespace

View File

@@ -1,7 +1,5 @@
#include "perf_precomp.hpp"
namespace{
static void printOsInfo()
{
#if defined _WIN32
@@ -69,6 +67,4 @@ static void printCudaInfo()
#endif
}
}
CV_PERF_TEST_MAIN(gpu, printCudaInfo())
CV_PERF_TEST_MAIN(gpu, printCudaInfo())

View File

@@ -3,137 +3,112 @@
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// SetTo
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_32F, CV_64F),
GPU_CHANNELS_1_3_4))
{
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
const int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
const int type = CV_MAKE_TYPE(depth, channels);
cv::Scalar val(1, 2, 3, 4);
const cv::Scalar val(1, 2, 3, 4);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(size, type);
cv::gpu::GpuMat dst(size, type);
d_src.setTo(val);
TEST_CYCLE() dst.setTo(val);
TEST_CYCLE()
{
d_src.setTo(val);
}
GPU_SANITY_CHECK(d_src);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat src(size, type);
cv::Mat dst(size, type);
src.setTo(val);
TEST_CYCLE() dst.setTo(val);
TEST_CYCLE()
{
src.setTo(val);
}
CPU_SANITY_CHECK(src);
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// SetToMasked
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_32F, CV_64F),
GPU_CHANNELS_1_3_4))
{
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
const int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
const int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2);
declare.in(src, mask, WARMUP_RNG);
cv::Scalar val(1, 2, 3, 4);
const cv::Scalar val(1, 2, 3, 4);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask);
cv::gpu::GpuMat dst(src);
const cv::gpu::GpuMat d_mask(mask);
d_src.setTo(val, d_mask);
TEST_CYCLE() dst.setTo(val, d_mask);
TEST_CYCLE()
{
d_src.setTo(val, d_mask);
}
GPU_SANITY_CHECK(d_src);
GPU_SANITY_CHECK(dst, 1e-10);
}
else
{
src.setTo(val, mask);
cv::Mat dst = src;
TEST_CYCLE()
{
src.setTo(val, mask);
}
TEST_CYCLE() dst.setTo(val, mask);
CPU_SANITY_CHECK(src);
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// CopyToMasked
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_32F, CV_64F),
GPU_CHANNELS_1_3_4))
{
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
const int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
const int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2);
declare.in(src, mask, WARMUP_RNG);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
const cv::gpu::GpuMat d_mask(mask);
cv::gpu::GpuMat dst(d_src.size(), d_src.type(), cv::Scalar::all(0));
d_src.copyTo(d_dst, d_mask);
TEST_CYCLE() d_src.copyTo(dst, d_mask);
TEST_CYCLE()
{
d_src.copyTo(d_dst, d_mask);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst, 1e-10);
}
else
{
cv::Mat dst;
cv::Mat dst(src.size(), src.type(), cv::Scalar::all(0));
src.copyTo(dst, mask);
TEST_CYCLE()
{
src.copyTo(dst, mask);
}
TEST_CYCLE() src.copyTo(dst, mask);
CPU_SANITY_CHECK(dst);
}
@@ -144,42 +119,36 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Valu
DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth);
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F)))
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_32F, CV_64F),
Values(CV_8U, CV_16U, CV_32F, CV_64F)))
{
cv::Size size = GET_PARAM(0);
int depth1 = GET_PARAM(1);
int depth2 = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int depth1 = GET_PARAM(1);
const int depth2 = GET_PARAM(2);
cv::Mat src(size, depth1);
fillRandom(src);
declare.in(src, WARMUP_RNG);
const double a = 0.5;
const double b = 1.0;
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
TEST_CYCLE() d_src.convertTo(dst, depth2, a, b);
TEST_CYCLE()
{
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
}
GPU_SANITY_CHECK(d_dst);
GPU_SANITY_CHECK(dst, 1e-10);
}
else
{
cv::Mat dst;
src.convertTo(dst, depth2, 0.5, 1.0);
TEST_CYCLE()
{
src.convertTo(dst, depth2, 0.5, 1.0);
}
TEST_CYCLE() src.convertTo(dst, depth2, a, b);
CPU_SANITY_CHECK(dst);
}
}
} // namespace

View File

@@ -3,90 +3,47 @@
using namespace std;
using namespace testing;
namespace {
///////////////////////////////////////////////////////////////
// HOG
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, ObjDetect_HOG, Values<string>("gpu/hog/road.png"))
PERF_TEST_P(Image, ObjDetect_HOG,
Values<string>("gpu/hog/road.png",
"gpu/caltech/image_00000009_0.png",
"gpu/caltech/image_00000032_0.png",
"gpu/caltech/image_00000165_0.png",
"gpu/caltech/image_00000261_0.png",
"gpu/caltech/image_00000469_0.png",
"gpu/caltech/image_00000527_0.png",
"gpu/caltech/image_00000574_0.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::Rect> found_locations;
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_img(img);
const cv::gpu::GpuMat d_img(img);
std::vector<cv::Rect> gpu_found_locations;
cv::gpu::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
d_hog.detectMultiScale(d_img, found_locations);
TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations);
TEST_CYCLE()
{
d_hog.detectMultiScale(d_img, found_locations);
}
SANITY_CHECK(gpu_found_locations);
}
else
{
std::vector<cv::Rect> cpu_found_locations;
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
TEST_CYCLE()
{
hog.detectMultiScale(img, found_locations);
}
SANITY_CHECK(cpu_found_locations);
}
SANITY_CHECK(found_locations);
}
//===========test for CalTech data =============//
DEF_PARAM_TEST_1(HOG, string);
PERF_TEST_P(HOG, CalTech, Values<string>("gpu/caltech/image_00000009_0.png", "gpu/caltech/image_00000032_0.png",
"gpu/caltech/image_00000165_0.png", "gpu/caltech/image_00000261_0.png", "gpu/caltech/image_00000469_0.png",
"gpu/caltech/image_00000527_0.png", "gpu/caltech/image_00000574_0.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::Rect> found_locations;
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_img(img);
cv::gpu::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
d_hog.detectMultiScale(d_img, found_locations);
TEST_CYCLE()
{
d_hog.detectMultiScale(d_img, found_locations);
}
}
else
{
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
TEST_CYCLE()
{
hog.detectMultiScale(img, found_locations);
}
}
SANITY_CHECK(found_locations);
}
///////////////////////////////////////////////////////////////
@@ -96,9 +53,9 @@ typedef pair<string, string> pair_string;
DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
{
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
@@ -106,33 +63,28 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_objects_buffer;
const cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat objects_buffer;
int detections_num = 0;
d_cascade.detectMultiScale(d_img, d_objects_buffer);
TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
TEST_CYCLE()
{
d_cascade.detectMultiScale(d_img, d_objects_buffer);
}
GPU_SANITY_CHECK(d_objects_buffer);
std::vector<cv::Rect> gpu_rects(detections_num);
cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
cv::groupRectangles(gpu_rects, 3, 0.2);
SANITY_CHECK(gpu_rects);
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
std::vector<cv::Rect> rects;
std::vector<cv::Rect> cpu_rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE() cascade.detectMultiScale(img, cpu_rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
CPU_SANITY_CHECK(rects);
SANITY_CHECK(cpu_rects);
}
}
@@ -140,9 +92,9 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
// LBP cascade
PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
{
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
@@ -150,34 +102,27 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_gpu_rects;
const cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat objects_buffer;
int detections_num = 0;
d_cascade.detectMultiScale(d_img, d_gpu_rects);
TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
TEST_CYCLE()
{
d_cascade.detectMultiScale(d_img, d_gpu_rects);
}
GPU_SANITY_CHECK(d_gpu_rects);
std::vector<cv::Rect> gpu_rects(detections_num);
cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
cv::groupRectangles(gpu_rects, 3, 0.2);
SANITY_CHECK(gpu_rects);
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
std::vector<cv::Rect> rects;
std::vector<cv::Rect> cpu_rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE() cascade.detectMultiScale(img, cpu_rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
CPU_SANITY_CHECK(rects);
SANITY_CHECK(cpu_rects);
}
}
} // namespace

File diff suppressed because it is too large Load Diff

View File

@@ -2,13 +2,6 @@
using namespace std;
using namespace cv;
using namespace cv::gpu;
void fillRandom(Mat& m, double a, double b)
{
RNG rng(123456789);
rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
}
Mat readImage(const string& fileName, int flags)
{
@@ -188,4 +181,4 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
};
*os << str[info.code];
}
}

View File

@@ -2,11 +2,9 @@
#define __OPENCV_PERF_GPU_UTILITY_HPP__
#include "opencv2/core/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/ts/ts_perf.hpp"
void fillRandom(cv::Mat& m, double a = 0.0, double b = 255.0);
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
using perf::MatType;
@@ -17,12 +15,13 @@ CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONS
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING, cv::NORM_MINMAX)
const int Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4;
enum { Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4 };
CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
#define GPU_CHANNELS_1_3_4 testing::Values(Gray, BGR, BGRA)
#define GPU_CHANNELS_1_3 testing::Values(Gray, BGR)
#define GPU_CHANNELS_1_3_4 testing::Values(MatCn(Gray), MatCn(BGR), MatCn(BGRA))
#define GPU_CHANNELS_1_3 testing::Values(MatCn(Gray), MatCn(BGR))
struct CvtColorInfo
{
@@ -30,7 +29,8 @@ struct CvtColorInfo
int dcn;
int code;
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
CvtColorInfo() {}
explicit CvtColorInfo(int scn_, int dcn_, int code_) : scn(scn_), dcn(dcn_), code(code_) {}
};
void PrintTo(const CvtColorInfo& info, std::ostream* os);
@@ -46,39 +46,18 @@ DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn);
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p)
#define GPU_SANITY_CHECK(dmat, ...) \
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
#define GPU_SANITY_CHECK(mat, ...) \
do{ \
cv::Mat d##dmat(dmat); \
SANITY_CHECK(d##dmat, ## __VA_ARGS__); \
cv::Mat gpu_##mat(mat); \
SANITY_CHECK(gpu_##mat, ## __VA_ARGS__); \
} while(0)
#define CPU_SANITY_CHECK(cmat, ...) \
#define CPU_SANITY_CHECK(mat, ...) \
do{ \
SANITY_CHECK(cmat, ## __VA_ARGS__); \
cv::Mat cpu_##mat(mat); \
SANITY_CHECK(cpu_##mat, ## __VA_ARGS__); \
} while(0)
#define GPU_SANITY_CHECK_KEYPOINTS(alg, dmat, ...) \
do{ \
cv::Mat d##dmat(dmat); \
cv::Mat __pt_x = d##dmat.row(cv::gpu::alg##_GPU::X_ROW); \
cv::Mat __pt_y = d##dmat.row(cv::gpu::alg##_GPU::Y_ROW); \
cv::Mat __angle = d##dmat.row(cv::gpu::alg##_GPU::ANGLE_ROW); \
cv::Mat __octave = d##dmat.row(cv::gpu::alg##_GPU::OCTAVE_ROW); \
cv::Mat __size = d##dmat.row(cv::gpu::alg##_GPU::SIZE_ROW); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-x-row", __pt_x, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-y-row", __pt_y, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-angle-row", __angle, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "octave-row", __octave, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-size-row", __size, ## __VA_ARGS__); \
} while(0)
#define GPU_SANITY_CHECK_RESPONSE(alg, dmat, ...) \
do{ \
cv::Mat d##dmat(dmat); \
cv::Mat __response = d##dmat.row(cv::gpu::alg##_GPU::RESPONSE_ROW); \
::perf::Regression::add(this, std::string(#dmat) + "-response-row", __response, ## __VA_ARGS__); \
} while(0)
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__

View File

@@ -648,7 +648,7 @@ namespace cv { namespace gpu { namespace device
tWeight += gmm_weight(mode * frame.rows + y, x);
if (tWeight > c_TB)
break;
};
}
}
fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
@@ -761,4 +761,4 @@ namespace cv { namespace gpu { namespace device
}}}
#endif /* CUDA_DISABLER */
#endif /* CUDA_DISABLER */

View File

@@ -194,10 +194,10 @@ namespace cv { namespace gpu { namespace device
if ( y > 0 && connected(intensity, image(y - 1, x)))
c |= UP;
if ( x - 1 < image.cols && connected(intensity, image(y, x + 1)))
if ( x + 1 < image.cols && connected(intensity, image(y, x + 1)))
c |= RIGHT;
if ( y - 1 < image.rows && connected(intensity, image(y + 1, x)))
if ( y + 1 < image.rows && connected(intensity, image(y + 1, x)))
c |= DOWN;
components(y, x) = c;

View File

@@ -2284,15 +2284,18 @@ namespace arithm
template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarOr<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
}
//////////////////////////////////////////////////////////////////////////

View File

@@ -2280,11 +2280,11 @@ namespace
{
typedef void (*bit_scalar_func_t)(PtrStepSzb src1, unsigned int src2, PtrStepSzb dst, cudaStream_t stream);
template <bit_scalar_func_t func> struct BitScalar
template <typename T, bit_scalar_func_t func> struct BitScalar
{
static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream)
{
func(src, static_cast<unsigned int>(sc.val[0]), dst, stream);
func(src, saturate_cast<T>(sc.val[0]), dst, stream);
}
};
@@ -2292,14 +2292,12 @@ namespace
{
static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream)
{
Scalar_<unsigned int> isc = sc;
unsigned int packedVal = 0;
packedVal |= (isc.val[0] & 0xffff);
packedVal |= (isc.val[1] & 0xffff) << 8;
packedVal |= (isc.val[2] & 0xffff) << 16;
packedVal |= (isc.val[3] & 0xffff) << 24;
packedVal |= (saturate_cast<unsigned char>(sc.val[0]) & 0xffff);
packedVal |= (saturate_cast<unsigned char>(sc.val[1]) & 0xffff) << 8;
packedVal |= (saturate_cast<unsigned char>(sc.val[2]) & 0xffff) << 16;
packedVal |= (saturate_cast<unsigned char>(sc.val[3]) & 0xffff) << 24;
func(src, packedVal, dst, stream);
}
@@ -2330,7 +2328,7 @@ namespace
oSizeROI.width = src.cols;
oSizeROI.height = src.rows;
const npp_t pConstants[] = {static_cast<npp_t>(sc.val[0]), static_cast<npp_t>(sc.val[1]), static_cast<npp_t>(sc.val[2]), static_cast<npp_t>(sc.val[3])};
const npp_t pConstants[] = {saturate_cast<npp_t>(sc.val[0]), saturate_cast<npp_t>(sc.val[1]), saturate_cast<npp_t>(sc.val[2]), saturate_cast<npp_t>(sc.val[3])};
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
@@ -2350,7 +2348,7 @@ namespace
oSizeROI.width = src.cols;
oSizeROI.height = src.rows;
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), static_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
@@ -2365,11 +2363,11 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] =
{
{BitScalar< bitScalarAnd<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call},
{BitScalar<unsigned char, bitScalarAnd<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call},
{0,0,0,0},
{BitScalar< bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
{BitScalar<unsigned short, bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
{0,0,0,0},
{BitScalar< bitScalarAnd<unsigned int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
{BitScalar<int, bitScalarAnd<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
};
const int depth = src.depth();
@@ -2390,11 +2388,11 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] =
{
{BitScalar< bitScalarOr<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call},
{BitScalar<unsigned char, bitScalarOr<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call},
{0,0,0,0},
{BitScalar< bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
{BitScalar<unsigned short, bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
{0,0,0,0},
{BitScalar< bitScalarOr<unsigned int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
{BitScalar<int, bitScalarOr<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
};
const int depth = src.depth();
@@ -2415,11 +2413,11 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] =
{
{BitScalar< bitScalarXor<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call},
{BitScalar<unsigned char, bitScalarXor<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call},
{0,0,0,0},
{BitScalar< bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
{BitScalar<unsigned short, bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
{0,0,0,0},
{BitScalar< bitScalarXor<unsigned int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
{BitScalar<int, bitScalarXor<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
};
const int depth = src.depth();

View File

@@ -104,12 +104,12 @@ void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scal
void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s)
{
if (!TargetArchs::builtWith(SHARED_ATOMICS) || !DeviceInfo().supports(SHARED_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
CV_Assert(!mask.empty() && mask.type() == CV_8U);
if (mask.size() != components.size() || components.type() != CV_32SC1)
components.create(mask.size(), CV_32SC1);
if (!deviceSupports(SHARED_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
components.create(mask.size(), CV_32SC1);
cudaStream_t stream = StreamAccessor::getStream(s);
device::ccl::labelComponents(mask, components, flags, stream);

View File

@@ -517,6 +517,7 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
dst.create(dsize, src.type());
dst.setTo(Scalar::all(0));
funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
}

View File

@@ -380,6 +380,7 @@ void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr,
dstcol[0] = static_cast<uchar>(sumcol[0] / comps.size[parent]);
dstcol[1] = static_cast<uchar>(sumcol[1] / comps.size[parent]);
dstcol[2] = static_cast<uchar>(sumcol[2] / comps.size[parent]);
dstcol[3] = 255;
}
}
}

View File

@@ -206,6 +206,8 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
uPyr_[0].setTo(Scalar::all(0));
vPyr_[0].setTo(Scalar::all(0));
uPyr_[1].setTo(Scalar::all(0));
vPyr_[1].setTo(Scalar::all(0));

View File

@@ -232,10 +232,8 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
};
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
#ifdef linux
// NPP bug on float data
useNpp = useNpp && src.depth() != CV_32F;
#endif
// NPP bug on float data
useNpp = useNpp && src.depth() != CV_32F;
if (useNpp)
{
@@ -372,10 +370,8 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
};
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
#ifdef linux
// NPP bug on float data
useNpp = useNpp && src.depth() != CV_32F;
#endif
// NPP bug on float data
useNpp = useNpp && src.depth() != CV_32F;
if (useNpp)
{

View File

@@ -207,11 +207,17 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, MOG, testing::Combine(
//////////////////////////////////////////////////////
// MOG2
PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, UseRoi)
namespace
{
IMPLEMENT_PARAM_CLASS(DetectShadow, bool)
}
PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, DetectShadow, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
std::string inputFile;
bool useGray;
bool detectShadow;
bool useRoi;
virtual void SetUp()
@@ -220,10 +226,9 @@ PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, UseRoi)
cv::gpu::setDevice(devInfo.deviceID());
inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
useGray = GET_PARAM(2);
useRoi = GET_PARAM(3);
detectShadow = GET_PARAM(3);
useRoi = GET_PARAM(4);
}
};
@@ -237,9 +242,11 @@ GPU_TEST_P(MOG2, Update)
ASSERT_FALSE(frame.empty());
cv::gpu::MOG2_GPU mog2;
mog2.bShadowDetection = detectShadow;
cv::gpu::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
cv::BackgroundSubtractorMOG2 mog2_gold;
mog2_gold.set("detectShadows", detectShadow);
cv::Mat foreground_gold;
for (int i = 0; i < 10; ++i)
@@ -258,11 +265,14 @@ GPU_TEST_P(MOG2, Update)
mog2_gold(frame, foreground_gold);
double norm = cv::norm(foreground_gold, cv::Mat(foreground), cv::NORM_L1);
norm /= foreground_gold.size().area();
ASSERT_LE(norm, 0.09);
if (detectShadow)
{
ASSERT_MAT_SIMILAR(foreground_gold, foreground, 1e-2);
}
else
{
ASSERT_MAT_NEAR(foreground_gold, foreground, 0);
}
}
}
@@ -277,9 +287,11 @@ GPU_TEST_P(MOG2, getBackgroundImage)
cv::Mat frame;
cv::gpu::MOG2_GPU mog2;
mog2.bShadowDetection = detectShadow;
cv::gpu::GpuMat foreground;
cv::BackgroundSubtractorMOG2 mog2_gold;
mog2_gold.set("detectShadows", detectShadow);
cv::Mat foreground_gold;
for (int i = 0; i < 10; ++i)
@@ -305,6 +317,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, MOG2, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi")),
testing::Values(UseGray(true), UseGray(false)),
testing::Values(DetectShadow(true), DetectShadow(false)),
WHOLE_SUBMAT));
//////////////////////////////////////////////////////

View File

@@ -1873,7 +1873,7 @@ PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channel
cv::gpu::setDevice(devInfo.deviceID());
src = randomMat(size, CV_MAKE_TYPE(depth, channels));
cv::Scalar_<int> ival = randomScalar(0.0, 255.0);
cv::Scalar_<int> ival = randomScalar(0.0, std::numeric_limits<int>::max());
val = ival;
}
};

View File

@@ -252,6 +252,8 @@ PARAM_TEST_CASE(WarpAffineNPP, cv::gpu::DeviceInfo, MatType, Inverse, Interpolat
GPU_TEST_P(WarpAffineNPP, Accuracy)
{
cv::Mat src = readImageType("stereobp/aloe-L.png", type);
ASSERT_FALSE(src.empty());
cv::Mat M = createTransfomMatrix(src.size(), CV_PI / 4);
int flags = interpolation;
if (inverse)

View File

@@ -255,6 +255,8 @@ PARAM_TEST_CASE(WarpPerspectiveNPP, cv::gpu::DeviceInfo, MatType, Inverse, Inter
GPU_TEST_P(WarpPerspectiveNPP, Accuracy)
{
cv::Mat src = readImageType("stereobp/aloe-L.png", type);
ASSERT_FALSE(src.empty());
cv::Mat M = createTransfomMatrix(src.size(), CV_PI / 4);
int flags = interpolation;
if (inverse)

View File

@@ -275,7 +275,7 @@ if(WIN32 AND WITH_FFMPEG)
COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/Release/${ffmpeg_bare_name_ver}"
COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/Debug/${ffmpeg_bare_name_ver}"
COMMENT "Copying ${ffmpeg_path} to the output directory")
elseif(MSVC)
elseif(MSVC AND (CMAKE_GENERATOR MATCHES "Visual"))
add_custom_command(TARGET ${the_module} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/${CMAKE_BUILD_TYPE}/${ffmpeg_bare_name_ver}"
COMMENT "Copying ${ffmpeg_path} to the output directory")

View File

@@ -217,6 +217,12 @@ endif(ANDROID AND ANDROID_EXECUTABLE)
set(step3_depends ${step2_depends} ${step3_input_files} ${copied_files})
if(ANDROID)
set(LIB_NAME_SUFIX "")
else()
set(LIB_NAME_SUFIX "${OPENCV_VERSION_MAJOR}${OPENCV_VERSION_MINOR}${OPENCV_VERSION_PATCH}")
endif()
# step 4: build jar
if(ANDROID)
set(JAR_FILE "${OpenCV_BINARY_DIR}/bin/classes.jar")
@@ -241,7 +247,7 @@ if(ANDROID)
)
endif()
else(ANDROID)
set(JAR_NAME opencv-${OPENCV_VERSION}.jar)
set(JAR_NAME opencv-${LIB_NAME_SUFIX}.jar)
set(JAR_FILE "${OpenCV_BINARY_DIR}/bin/${JAR_NAME}")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build.xml.in" "${OpenCV_BINARY_DIR}/build.xml" IMMEDIATE @ONLY)
list(APPEND step3_depends "${OpenCV_BINARY_DIR}/build.xml")
@@ -294,8 +300,8 @@ endif()
# Additional target properties
set_target_properties(${the_module} PROPERTIES
OUTPUT_NAME "${the_module}${OPENCV_DLLVERSION}"
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
OUTPUT_NAME "${the_module}${LIB_NAME_SUFIX}"
#DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
INSTALL_NAME_DIR ${OPENCV_LIB_INSTALL_PATH}

View File

@@ -557,6 +557,15 @@ func_arg_fix = {
}, # '', i.e. no class
} # func_arg_fix
def getLibVersion(version_hpp_path):
version_file = open(version_hpp_path, "rt").read()
epoch = re.search("^W*#\W*define\W+CV_VERSION_EPOCH\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
major = re.search("^W*#\W*define\W+CV_VERSION_MAJOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
minor = re.search("^W*#\W*define\W+CV_VERSION_MINOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
revision = re.search("^W*#\W*define\W+CV_VERSION_REVISION\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
return (epoch, major, minor, revision)
class ConstInfo(object):
def __init__(self, cname, name, val, addedManually=False):
self.cname = cname
@@ -719,13 +728,16 @@ $imports
public class %(jc)s {
""" % { 'm' : self.module, 'jc' : jname } )
# self.java_code[class_name]["jn_code"].write("""
# //
# // native stuff
# //
# static { System.loadLibrary("opencv_java"); }
#""" )
if class_name == 'Core':
(epoch, major, minor, revision) = getLibVersion(
(os.path.dirname(__file__) or '.') + '/../../core/include/opencv2/core/version.hpp')
version_str = '.'.join( (epoch, major, minor, revision) )
version_suffix = ''.join( (epoch, major, minor) )
self.classes[class_name].imports.add("java.lang.String")
self.java_code[class_name]["j_code"].write("""
public static final String VERSION = "%(v)s", NATIVE_LIBRARY_NAME = "opencv_java%(vs)s";
public static final int VERSION_EPOCH = %(ep)s, VERSION_MAJOR = %(ma)s, VERSION_MINOR = %(mi)s, VERSION_REVISION = %(re)s;
""" % { 'v' : version_str, 'vs' : version_suffix, 'ep' : epoch, 'ma' : major, 'mi' : minor, 're' : revision } )
def add_class(self, decl):

View File

@@ -2122,12 +2122,16 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
};
int dst_step1 = dst.cols * dst.elemSize();
int src1_step = (int) src1.step;
int src2_step = (int) src2.step;
int dst_step = (int) dst.step;
float alpha_f = alpha, beta_f = beta, gama_f = gama;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset));
if(src1.clCxt -> impl -> double_support != 0)
@@ -2138,14 +2142,13 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
}
else
{
float alpha_f = alpha, beta_f = beta, gama_f = gama;
args.push_back( std::make_pair( sizeof(cl_float), (void *)&alpha_f ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&beta_f ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&gama_f ));
}
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));

View File

@@ -73,7 +73,7 @@ void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &
size_t localSize[] = {256, 1, 1};
std::vector< std::pair<size_t, const void *> > args;
result.create(img1.size(), CV_MAKE_TYPE(depth,img1.channels()));
if(globalSize[0] != 0)
{
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data ));

View File

@@ -60,7 +60,7 @@ namespace cv
}
template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &mask,
void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, int distType)
{
cv::ocl::Context *ctx = query.clCxt;
@@ -75,7 +75,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
{
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( std::make_pair( smemSize, (void *)NULL));
@@ -101,7 +101,7 @@ void matchUnrolledCached(const oclMat /*query*/, const oclMat * /*trains*/, int
}
template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
void match(const oclMat &query, const oclMat &train, const oclMat &mask,
void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, int distType)
{
cv::ocl::Context *ctx = query.clCxt;
@@ -115,7 +115,7 @@ void match(const oclMat &query, const oclMat &train, const oclMat &mask,
{
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( std::make_pair( smemSize, (void *)NULL));
@@ -141,7 +141,7 @@ void match(const oclMat /*query*/, const oclMat * /*trains*/, int /*n*/, const o
//radius_matchUnrolledCached
template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
{
cv::ocl::Context *ctx = query.clCxt;
@@ -157,7 +157,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&maxDistance ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
@@ -181,7 +181,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
//radius_match
template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
{
cv::ocl::Context *ctx = query.clCxt;
@@ -196,7 +196,7 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&maxDistance ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
@@ -470,7 +470,7 @@ void matchDispatcher(const oclMat &query, const oclMat &train, int n, float maxD
//knn match Dispatcher
template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &mask,
void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, int distType)
{
cv::ocl::Context *ctx = query.clCxt;
@@ -485,7 +485,7 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
{
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( std::make_pair( smemSize, (void *)NULL));
@@ -505,7 +505,7 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
}
template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask,
void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, int distType)
{
cv::ocl::Context *ctx = query.clCxt;
@@ -519,7 +519,7 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask,
{
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( std::make_pair( smemSize, (void *)NULL));
@@ -538,7 +538,7 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask,
}
template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &allDist, int distType)
void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &allDist, int distType)
{
cv::ocl::Context *ctx = query.clCxt;
size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
@@ -552,7 +552,7 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
{
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&allDist.data ));
args.push_back( std::make_pair( smemSize, (void *)NULL));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_size ));
@@ -571,7 +571,7 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
}
template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
void calcDistance(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &allDist, int distType)
void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &allDist, int distType)
{
cv::ocl::Context *ctx = query.clCxt;
size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
@@ -584,7 +584,7 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &mask,
{
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&allDist.data ));
args.push_back( std::make_pair( smemSize, (void *)NULL));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_size ));
@@ -1005,6 +1005,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, cons
void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask)
{
assert(mask.empty()); // mask is not supported at the moment
oclMat trainIdx, distance;
matchSingle(query, train, trainIdx, distance, mask);
matchDownload(trainIdx, distance, matches);
@@ -1448,7 +1449,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, std::vec
// radiusMatchSingle
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train,
oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
{
if (query.empty() || train.empty())
return;
@@ -1694,4 +1695,4 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, std::
oclMat trainIdx, imgIdx, distance, nMatches;
radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks);
radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult);
}
}

View File

@@ -288,13 +288,14 @@ namespace cv
args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&cols));
if(src.clCxt -> impl -> double_support != 0)
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
if(src.clCxt -> impl -> double_support != 0)
{
args.push_back( std::make_pair(sizeof(cl_double4), (void *)&borderValue));
}
else
{
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
args.push_back( std::make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
}

View File

@@ -5,11 +5,13 @@ int bit1Count(float x)
{
int c = 0;
int ix = (int)x;
for (int i = 0 ; i < 32 ; i++)
{
c += ix & 0x1;
ix >>= 1;
}
return (float)c;
}
/* 2dim launch, global size: dim0 is (query rows + block_size - 1) / block_size * block_size, dim1 is block_size
@@ -18,7 +20,7 @@ local size: dim0 is block_size, dim1 is block_size.
__kernel void BruteForceMatch_UnrollMatch(
__global float *query,
__global float *train,
__global float *mask,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__local float *sharebuffer,
@@ -30,7 +32,7 @@ __kernel void BruteForceMatch_UnrollMatch(
int train_cols,
int step,
int distType
)
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
@@ -40,6 +42,7 @@ __kernel void BruteForceMatch_UnrollMatch(
__local float *s_train = sharebuffer + block_size * max_desc_len;
int queryIdx = groupidx * block_size + lidy;
// load the query into local memory.
for (int i = 0 ; i < max_desc_len / block_size; i ++)
{
@@ -52,9 +55,11 @@ __kernel void BruteForceMatch_UnrollMatch(
// loopUnrolledCached to find the best trainIdx and best distance.
volatile int imgIdx = 0;
for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
{
float result = 0;
for (int i = 0 ; i < max_desc_len / block_size ; i++)
{
//load a block_size * block_size block into local train.
@@ -67,28 +72,34 @@ __kernel void BruteForceMatch_UnrollMatch(
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
switch(distType)
switch (distType)
{
case 0:
for (int j = 0 ; j < block_size ; j++)
{
result += fabs(s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; j++)
{
float qr = s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; j++)
{
//result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
}
break;
case 0:
for (int j = 0 ; j < block_size ; j++)
{
result += fabs(s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; j++)
{
float qr = s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; j++)
{
//result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^(uint)s_train[j * block_size + lidx]);
}
break;
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -105,8 +116,8 @@ __kernel void BruteForceMatch_UnrollMatch(
}
barrier(CLK_LOCAL_MEM_FENCE);
__local float *s_distance = (__local float*)(sharebuffer);
__local int* s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
__local float *s_distance = (__local float *)(sharebuffer);
__local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
//find BestMatch
s_distance += lidy * block_size;
@@ -136,7 +147,7 @@ __kernel void BruteForceMatch_UnrollMatch(
__kernel void BruteForceMatch_Match(
__global float *query,
__global float *train,
__global float *mask,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__local float *sharebuffer,
@@ -147,7 +158,7 @@ __kernel void BruteForceMatch_Match(
int train_cols,
int step,
int distType
)
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
@@ -166,6 +177,7 @@ __kernel void BruteForceMatch_Match(
{
//Dist dist;
float result = 0;
for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; i++)
{
const int loadx = lidx + i * block_size;
@@ -184,28 +196,34 @@ __kernel void BruteForceMatch_Match(
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
switch(distType)
switch (distType)
{
case 0:
for (int j = 0 ; j < block_size ; j++)
{
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; j++)
{
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; j++)
{
//result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[(uint)j * block_size + lidx]);
}
break;
case 0:
for (int j = 0 ; j < block_size ; j++)
{
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; j++)
{
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; j++)
{
//result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[(uint)j * block_size + lidx]);
}
break;
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -256,7 +274,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
__global float *query,
__global float *train,
float maxDistance,
__global float *mask,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__global int *nMatches,
@@ -271,7 +289,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
int step,
int ostep,
int distType
)
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
@@ -285,6 +303,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
__local float *s_train = sharebuffer + block_size * block_size;
float result = 0;
for (int i = 0 ; i < max_desc_len / block_size ; ++i)
{
//load a block_size * block_size block into local train.
@@ -299,27 +318,33 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
/* there are three types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
switch(distType)
switch (distType)
{
case 0:
for (int j = 0 ; j < block_size ; ++j)
{
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; ++j)
{
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; ++j)
{
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
}
break;
case 0:
for (int j = 0 ; j < block_size ; ++j)
{
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; ++j)
{
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; ++j)
{
result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[j * block_size + lidx]);
}
break;
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -329,7 +354,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
{
unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
if(ind < bestTrainIdx_cols)
if (ind < bestTrainIdx_cols)
{
//bestImgIdx = imgIdx;
bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
@@ -343,7 +368,7 @@ __kernel void BruteForceMatch_RadiusMatch(
__global float *query,
__global float *train,
float maxDistance,
__global float *mask,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__global int *nMatches,
@@ -357,7 +382,7 @@ __kernel void BruteForceMatch_RadiusMatch(
int step,
int ostep,
int distType
)
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
@@ -371,6 +396,7 @@ __kernel void BruteForceMatch_RadiusMatch(
__local float *s_train = sharebuffer + block_size * block_size;
float result = 0;
for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; ++i)
{
//load a block_size * block_size block into local train.
@@ -385,27 +411,33 @@ __kernel void BruteForceMatch_RadiusMatch(
/* there are three types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
switch(distType)
switch (distType)
{
case 0:
for (int j = 0 ; j < block_size ; ++j)
{
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; ++j)
{
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; ++j)
{
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
}
break;
case 0:
for (int j = 0 ; j < block_size ; ++j)
{
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; ++j)
{
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; ++j)
{
result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[j * block_size + lidx]);
}
break;
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -415,7 +447,7 @@ __kernel void BruteForceMatch_RadiusMatch(
{
unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
if(ind < bestTrainIdx_cols)
if (ind < bestTrainIdx_cols)
{
//bestImgIdx = imgIdx;
bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
@@ -428,7 +460,7 @@ __kernel void BruteForceMatch_RadiusMatch(
__kernel void BruteForceMatch_knnUnrollMatch(
__global float *query,
__global float *train,
__global float *mask,
//__global float *mask,
__global int2 *bestTrainIdx,
__global float2 *bestDistance,
__local float *sharebuffer,
@@ -440,7 +472,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
int train_cols,
int step,
int distType
)
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
@@ -464,9 +496,11 @@ __kernel void BruteForceMatch_knnUnrollMatch(
//loopUnrolledCached
volatile int imgIdx = 0;
for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
{
float result = 0;
for (int i = 0 ; i < max_desc_len / block_size ; i++)
{
const int loadX = lidx + i * block_size;
@@ -480,28 +514,34 @@ __kernel void BruteForceMatch_knnUnrollMatch(
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
switch(distType)
switch (distType)
{
case 0:
for (int j = 0 ; j < block_size ; j++)
{
result += fabs(s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; j++)
{
float qr = s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; j++)
{
//result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
}
break;
case 0:
for (int j = 0 ; j < block_size ; j++)
{
result += fabs(s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; j++)
{
float qr = s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; j++)
{
//result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^(uint)s_train[j * block_size + lidx]);
}
break;
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -549,6 +589,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
for (int i = 0 ; i < block_size ; i++)
{
float val = s_distance[i];
if (val < bestDistance1)
{
bestDistance2 = bestDistance1;
@@ -602,7 +643,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
__kernel void BruteForceMatch_knnMatch(
__global float *query,
__global float *train,
__global float *mask,
//__global float *mask,
__global int2 *bestTrainIdx,
__global float2 *bestDistance,
__local float *sharebuffer,
@@ -613,7 +654,7 @@ __kernel void BruteForceMatch_knnMatch(
int train_cols,
int step,
int distType
)
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
@@ -632,7 +673,8 @@ __kernel void BruteForceMatch_knnMatch(
for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
{
float result = 0.0f;
for (int i = 0 ; i < (query_cols + block_size -1) / block_size ; i++)
for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; i++)
{
const int loadx = lidx + i * block_size;
//load query and train into local memory
@@ -650,28 +692,34 @@ __kernel void BruteForceMatch_knnMatch(
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
switch(distType)
switch (distType)
{
case 0:
for (int j = 0 ; j < block_size ; j++)
{
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; j++)
{
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; j++)
{
//result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[(uint)j * block_size + lidx]);
}
break;
case 0:
for (int j = 0 ; j < block_size ; j++)
{
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
}
break;
case 1:
for (int j = 0 ; j < block_size ; j++)
{
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
result += qr * qr;
}
break;
case 2:
for (int j = 0 ; j < block_size ; j++)
{
//result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[(uint)j * block_size + lidx]);
}
break;
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -719,6 +767,7 @@ __kernel void BruteForceMatch_knnMatch(
for (int i = 0 ; i < block_size ; i++)
{
float val = s_distance[i];
if (val < bestDistance1)
{
bestDistance2 = bestDistance1;
@@ -772,7 +821,7 @@ __kernel void BruteForceMatch_knnMatch(
kernel void BruteForceMatch_calcDistanceUnrolled(
__global float *query,
__global float *train,
__global float *mask,
//__global float *mask,
__global float *allDist,
__local float *sharebuffer,
int block_size,
@@ -790,7 +839,7 @@ kernel void BruteForceMatch_calcDistanceUnrolled(
kernel void BruteForceMatch_calcDistance(
__global float *query,
__global float *train,
__global float *mask,
//__global float *mask,
__global float *allDist,
__local float *sharebuffer,
int block_size,
@@ -808,9 +857,9 @@ kernel void BruteForceMatch_findBestMatch(
__global float *allDist,
__global int *bestTrainIdx,
__global float *bestDistance,
int k,
int block_size
)
int k,
int block_size
)
{
/* Todo */
}

View File

@@ -78,7 +78,7 @@ uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int col
// dynamically change the precision used for floating type
#if defined (__ATI__) || defined (__NVIDIA__)
#if defined DOUBLE_SUPPORT
#define F double
#else
#define F float
@@ -299,7 +299,7 @@ __kernel
__global const float * det,
__global const float * trace,
__global int4 * maxPosBuffer,
volatile __global unsigned int* maxCounter,
volatile __global int* maxCounter,
int counter_offset,
int det_step, // the step of det in bytes
int trace_step, // the step of trace in bytes
@@ -408,7 +408,7 @@ __kernel
if(condmax)
{
unsigned int ind = atomic_inc(maxCounter);
int ind = atomic_inc(maxCounter);
if (ind < c_max_candidates)
{
@@ -427,7 +427,7 @@ __kernel
__global float * det,
__global float * trace,
__global int4 * maxPosBuffer,
volatile __global unsigned int* maxCounter,
volatile __global int* maxCounter,
int counter_offset,
int det_step, // the step of det in bytes
int trace_step, // the step of trace in bytes
@@ -525,7 +525,7 @@ __kernel
if(condmax)
{
unsigned int ind = atomic_inc(maxCounter);
int ind = atomic_inc(maxCounter);
if (ind < c_max_candidates)
{
@@ -585,7 +585,7 @@ __kernel
__global const float * det,
__global const int4 * maxPosBuffer,
__global float * keypoints,
volatile __global unsigned int * featureCounter,
volatile __global int * featureCounter,
int det_step,
int keypoints_step,
int c_img_rows,
@@ -684,7 +684,7 @@ __kernel
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
{
// Get a new feature index.
unsigned int ind = atomic_inc(featureCounter);
int ind = atomic_inc(featureCounter);
if (ind < c_max_features)
{
@@ -737,19 +737,19 @@ __constant float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448
__constant float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
__constant float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
void reduce_32_sum(volatile __local float * data, float partial_reduction, int tid)
void reduce_32_sum(volatile __local float * data, volatile float* partial_reduction, int tid)
{
#define op(A, B) (A)+(B)
data[tid] = partial_reduction;
#define op(A, B) (*A)+(B)
data[tid] = *partial_reduction;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]);
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 16]);
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 8 ]);
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 4 ]);
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 2 ]);
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 1 ]);
}
#undef op
}
@@ -831,7 +831,7 @@ __kernel
{
const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC;
float sumx = 0.0f, sumy = 0.0f;
volatile float sumx = 0.0f, sumy = 0.0f;
int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
@@ -856,8 +856,8 @@ __kernel
sumx += s_X[get_local_id(0) + 96];
sumy += s_Y[get_local_id(0) + 96];
}
reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
reduce_32_sum(s_sumx + get_local_id(1) * 32, &sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32, &sumy, get_local_id(0));
const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod)
@@ -892,14 +892,32 @@ __kernel
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
kp_dir = 360.0f - kp_dir;
if (fabs(kp_dir - 360.f) < FLT_EPSILON)
kp_dir = 0.f;
//kp_dir = 360.0f - kp_dir;
//if (fabs(kp_dir - 360.f) < FLT_EPSILON)
// kp_dir = 0.f;
featureDir[get_group_id(0)] = kp_dir;
}
}
__kernel
void icvSetUpright(
__global float * keypoints,
int keypoints_step,
int nFeatures
)
{
keypoints_step /= sizeof(*keypoints);
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
if(get_global_id(0) <= nFeatures)
{
featureDir[get_global_id(0)] = 90.0f;
}
}
#undef ORI_SEARCH_INC
#undef ORI_WIN
#undef ORI_SAMPLES
@@ -993,10 +1011,7 @@ void calc_dx_dy(
const float centerX = featureX[get_group_id(0)];
const float centerY = featureY[get_group_id(0)];
const float size = featureSize[get_group_id(0)];
float descriptor_dir = 360.0f - featureDir[get_group_id(0)];
if (fabs(descriptor_dir - 360.f) < FLT_EPSILON)
descriptor_dir = 0.f;
descriptor_dir *= (float)(CV_PI_F / 180.0f);
float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f);
/* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */
@@ -1125,11 +1140,15 @@ __kernel
{
sdxabs[tid] = fabs(sdx[tid]); // |dx| array
sdyabs[tid] = fabs(sdy[tid]); // |dy| array
//barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
//barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
// write dx, dy, |dx|, |dy|

View File

@@ -140,6 +140,10 @@ float reduce_smem(volatile __local float* smem, int size)
if (tid < 32)
{
if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
@@ -224,6 +228,11 @@ __kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr
{
volatile __local float* smem = products;
smem[tid] = product = product + smem[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
volatile __local float* smem = products;
smem[tid] = product = product + smem[tid + 16];
smem[tid] = product = product + smem[tid + 8];
smem[tid] = product = product + smem[tid + 4];

View File

@@ -56,6 +56,21 @@ namespace cv
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *nonfree_surf;
const char* noImage2dOption = "-D DISABLE_IMAGE2D";
static void openCLExecuteKernelSURF(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{
if(support_image2d())
{
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth);
}
else
{
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, noImage2dOption);
}
}
}
}
@@ -79,10 +94,6 @@ static inline int calcSize(int octave, int layer)
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
}
namespace
{
const char* noImage2dOption = "-D DISABLE_IMAGE2D";
}
class SURF_OCL_Invoker
{
@@ -99,15 +110,16 @@ public:
void icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
void icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter,
void icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures);
void icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures);
void icvSetUpright_gpu(const oclMat &keypoints, int nFeatures);
void compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures);
// end of kernel callers declarations
SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) :
surf_(surf),
img_cols(img.cols), img_rows(img.rows),
@@ -181,8 +193,8 @@ public:
icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
int maxCounter = ((Mat)counters).at<int>(1 + octave);
maxCounter = std::min(maxCounter, static_cast<int>(maxCandidates));
if (maxCounter > 0)
{
@@ -190,15 +202,29 @@ public:
keypoints, counters, octave, layer_rows, maxFeatures);
}
}
unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
int featureCounter = Mat(counters).at<int>(0);
featureCounter = std::min(featureCounter, static_cast<int>(maxFeatures));
keypoints.cols = featureCounter;
if (surf_.upright)
keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
{
//keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
setUpright(keypoints);
}
else
{
findOrientation(keypoints);
}
}
void setUpright(oclMat &keypoints)
{
const int nFeatures = keypoints.cols;
if(nFeatures > 0)
{
icvSetUpright_gpu(keypoints, keypoints.cols);
}
}
void findOrientation(oclMat &keypoints)
@@ -483,14 +509,7 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
1
};
if(support_image2d())
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
}
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
@@ -536,17 +555,10 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat
1
};
if(support_image2d())
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
}
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter,
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures)
{
Context *clCxt = det.clCxt;
@@ -568,14 +580,7 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa
size_t localThreads[3] = {3, 3, 3};
size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1};
if(support_image2d())
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
}
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures)
@@ -602,16 +607,27 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
size_t localThreads[3] = {32, 4, 1};
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
if(support_image2d())
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
}
void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
{
Context *clCxt = counters.clCxt;
std::string kernelName = "icvSetUpright";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&nFeatures));
size_t localThreads[3] = {256, 1, 1};
size_t globalThreads[3] = {nFeatures, 1, 1};
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures)
{
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
@@ -647,14 +663,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
if(support_image2d())
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
}
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
kernelName = "normalize_descriptors64";
@@ -667,14 +677,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.clear();
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
if(support_image2d())
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
}
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
@@ -702,14 +706,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
if(support_image2d())
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
}
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
kernelName = "normalize_descriptors128";
@@ -722,14 +720,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.clear();
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
if(support_image2d())
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
}
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
}