Merge release 2.4.4
This commit is contained in:
@@ -3,15 +3,14 @@
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoBM
|
||||
|
||||
typedef std::tr1::tuple<string, string> pair_string;
|
||||
DEF_PARAM_TEST_1(ImagePair, pair_string);
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBM,
|
||||
Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
|
||||
{
|
||||
declare.time(5.0);
|
||||
|
||||
@@ -28,18 +27,13 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png",
|
||||
{
|
||||
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
const cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
d_bm(d_imgLeft, d_imgRight, d_dst);
|
||||
TEST_CYCLE() d_bm(d_imgLeft, d_imgRight, dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_bm(d_imgLeft, d_imgRight, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -47,12 +41,7 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png",
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
bm(imgLeft, imgRight, dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
bm(imgLeft, imgRight, dst);
|
||||
}
|
||||
TEST_CYCLE() bm(imgLeft, imgRight, dst);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -61,7 +50,8 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png",
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoBeliefPropagation
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation,
|
||||
Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
|
||||
{
|
||||
declare.time(10.0);
|
||||
|
||||
@@ -77,29 +67,25 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(pair_string("gpu/
|
||||
{
|
||||
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
const cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
d_bp(d_imgLeft, d_imgRight, d_dst);
|
||||
TEST_CYCLE() d_bp(d_imgLeft, d_imgRight, dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_bp(d_imgLeft, d_imgRight, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoConstantSpaceBP
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP,
|
||||
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
|
||||
{
|
||||
declare.time(10.0);
|
||||
|
||||
@@ -115,29 +101,25 @@ PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(pair_string("gpu/st
|
||||
{
|
||||
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
const cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
d_csbp(d_imgLeft, d_imgRight, d_dst);
|
||||
TEST_CYCLE() d_csbp(d_imgLeft, d_imgRight, dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_csbp(d_imgLeft, d_imgRight, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DisparityBilateralFilter
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
|
||||
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter,
|
||||
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
|
||||
{
|
||||
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
@@ -151,22 +133,17 @@ PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu
|
||||
{
|
||||
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_disp(disp);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_img(img);
|
||||
const cv::gpu::GpuMat d_disp(disp);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
d_filter(d_disp, d_img, d_dst);
|
||||
TEST_CYCLE() d_filter(d_disp, d_img, dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_filter(d_disp, d_img, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,45 +152,42 @@ PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu
|
||||
|
||||
DEF_PARAM_TEST_1(Count, int);
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_TransformPoints, Values(5000, 10000, 20000))
|
||||
PERF_TEST_P(Count, Calib3D_TransformPoints,
|
||||
Values(5000, 10000, 20000))
|
||||
{
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat src(1, count, CV_32FC3);
|
||||
fillRandom(src, -100, 100);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
|
||||
TEST_CYCLE() cv::gpu::transformPoints(d_src, rvec, tvec, dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ProjectPoints
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
|
||||
PERF_TEST_P(Count, Calib3D_ProjectPoints,
|
||||
Values(5000, 10000, 20000))
|
||||
{
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat src(1, count, CV_32FC3);
|
||||
fillRandom(src, -100, 100);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
@@ -221,28 +195,18 @@ PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
|
||||
TEST_CYCLE() cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
|
||||
}
|
||||
TEST_CYCLE() cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -251,17 +215,18 @@ PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SolvePnPRansac
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
|
||||
PERF_TEST_P(Count, Calib3D_SolvePnPRansac,
|
||||
Values(5000, 10000, 20000))
|
||||
{
|
||||
declare.time(10.0);
|
||||
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat object(1, count, CV_32FC3);
|
||||
fillRandom(object, -100, 100);
|
||||
declare.in(object, WARMUP_RNG);
|
||||
|
||||
cv::Mat camera_mat(3, 3, CV_32FC1);
|
||||
fillRandom(camera_mat, 0.5, 1);
|
||||
cv::randu(camera_mat, 0.5, 1);
|
||||
camera_mat.at<float>(0, 1) = 0.f;
|
||||
camera_mat.at<float>(1, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 0) = 0.f;
|
||||
@@ -269,79 +234,66 @@ PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
|
||||
|
||||
const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
|
||||
|
||||
std::vector<cv::Point2f> image_vec;
|
||||
cv::Mat rvec_gold(1, 3, CV_32FC1);
|
||||
fillRandom(rvec_gold, 0, 1);
|
||||
cv::randu(rvec_gold, 0, 1);
|
||||
|
||||
cv::Mat tvec_gold(1, 3, CV_32FC1);
|
||||
fillRandom(tvec_gold, 0, 1);
|
||||
cv::randu(tvec_gold, 0, 1);
|
||||
|
||||
std::vector<cv::Point2f> image_vec;
|
||||
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
|
||||
|
||||
cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
|
||||
const cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
|
||||
|
||||
cv::Mat rvec;
|
||||
cv::Mat tvec;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
TEST_CYCLE() cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
}
|
||||
GPU_SANITY_CHECK(rvec, 1e-3);
|
||||
GPU_SANITY_CHECK(tvec, 1e-3);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
TEST_CYCLE() cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
}
|
||||
CPU_SANITY_CHECK(rvec, 1e-6);
|
||||
CPU_SANITY_CHECK(tvec, 1e-6);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(rvec);
|
||||
CPU_SANITY_CHECK(tvec);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ReprojectImageTo3D
|
||||
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16S)))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, depth);
|
||||
fillRandom(src, 5.0, 30.0);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
cv::Mat Q(4, 4, CV_32FC1);
|
||||
fillRandom(Q, 0.1, 1.0);
|
||||
cv::randu(Q, 0.1, 1.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
|
||||
TEST_CYCLE() cv::gpu::reprojectImageTo3D(d_src, dst, Q);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::reprojectImageTo3D(src, dst, Q);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::reprojectImageTo3D(src, dst, Q);
|
||||
}
|
||||
TEST_CYCLE() cv::reprojectImageTo3D(src, dst, Q);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -350,32 +302,27 @@ PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DrawColorDisp
|
||||
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16S)))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src, 0, 255);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
cv::gpu::drawColorDisp(d_src, d_dst, 255);
|
||||
TEST_CYCLE() cv::gpu::drawColorDisp(d_src, dst, 255);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::drawColorDisp(d_src, d_dst, 255);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -3,8 +3,7 @@
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
#define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::szXGA, perf::sz720p, perf::sz1080p)
|
||||
|
||||
#define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::sz720p)
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BilateralFilter
|
||||
@@ -12,96 +11,86 @@ using namespace testing;
|
||||
DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int);
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn_KernelSz, Denoising_BilateralFilter,
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES, Values(CV_8U, CV_32F), GPU_CHANNELS_1_3, Values(3, 5, 9)))
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES,
|
||||
Values(CV_8U, CV_32F),
|
||||
GPU_CHANNELS_1_3,
|
||||
Values(3, 5, 9)))
|
||||
{
|
||||
declare.time(60.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
int kernel_size = GET_PARAM(3);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
const int channels = GET_PARAM(2);
|
||||
const int kernel_size = GET_PARAM(3);
|
||||
|
||||
float sigma_color = 7;
|
||||
float sigma_spatial = 5;
|
||||
int borderMode = cv::BORDER_REFLECT101;
|
||||
const float sigma_color = 7;
|
||||
const float sigma_spatial = 5;
|
||||
const int borderMode = cv::BORDER_REFLECT101;
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
const int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
cv::gpu::bilateralFilter(d_src, d_dst, kernel_size, sigma_color, sigma_spatial, borderMode);
|
||||
TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::bilateralFilter(d_src, d_dst, kernel_size, sigma_color, sigma_spatial, borderMode);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
|
||||
}
|
||||
TEST_CYCLE() cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// nonLocalMeans
|
||||
|
||||
DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), GPU_CHANNELS_1_3, Values(21), Values(5, 7)))
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES,
|
||||
Values<MatDepth>(CV_8U),
|
||||
GPU_CHANNELS_1_3,
|
||||
Values(21),
|
||||
Values(5)))
|
||||
{
|
||||
declare.time(60.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
const int channels = GET_PARAM(2);
|
||||
const int search_widow_size = GET_PARAM(3);
|
||||
const int block_size = GET_PARAM(4);
|
||||
|
||||
int search_widow_size = GET_PARAM(3);
|
||||
int block_size = GET_PARAM(4);
|
||||
const float h = 10;
|
||||
const int borderMode = cv::BORDER_REFLECT101;
|
||||
|
||||
float h = 10;
|
||||
int borderMode = cv::BORDER_REFLECT101;
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
const int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
cv::gpu::nonLocalMeans(d_src, d_dst, h, search_widow_size, block_size, borderMode);
|
||||
TEST_CYCLE() cv::gpu::nonLocalMeans(d_src, dst, h, search_widow_size, block_size, borderMode);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::nonLocalMeans(d_src, d_dst, h, search_widow_size, block_size, borderMode);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy";
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,46 +101,41 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
|
||||
DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), GPU_CHANNELS_1_3, Values(21), Values(7)))
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES,
|
||||
Values<MatDepth>(CV_8U),
|
||||
GPU_CHANNELS_1_3,
|
||||
Values(21),
|
||||
Values(7)))
|
||||
{
|
||||
declare.time(150.0);
|
||||
declare.time(60.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
const int search_widow_size = GET_PARAM(2);
|
||||
const int block_size = GET_PARAM(3);
|
||||
|
||||
int search_widow_size = GET_PARAM(2);
|
||||
int block_size = GET_PARAM(3);
|
||||
|
||||
float h = 10;
|
||||
int type = CV_MAKE_TYPE(depth, 1);
|
||||
const float h = 10;
|
||||
const int type = CV_MAKE_TYPE(depth, 1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::FastNonLocalMeansDenoising fnlmd;
|
||||
|
||||
fnlmd.simpleMethod(d_src, d_dst, h, search_widow_size, block_size);
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
fnlmd.simpleMethod(d_src, d_dst, h, search_widow_size, block_size);
|
||||
}
|
||||
TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size);
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
|
||||
}
|
||||
TEST_CYCLE() cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -163,47 +147,41 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
|
||||
DEF_PARAM_TEST(Sz_Depth_WinSz_BlockSz, cv::Size, MatDepth, int, int);
|
||||
|
||||
PERF_TEST_P(Sz_Depth_WinSz_BlockSz, Denoising_FastNonLocalMeansColored,
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), Values(21), Values(7)))
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES,
|
||||
Values<MatDepth>(CV_8U),
|
||||
Values(21),
|
||||
Values(7)))
|
||||
{
|
||||
declare.time(350.0);
|
||||
declare.time(60.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
const int search_widow_size = GET_PARAM(2);
|
||||
const int block_size = GET_PARAM(3);
|
||||
|
||||
int search_widow_size = GET_PARAM(2);
|
||||
int block_size = GET_PARAM(3);
|
||||
|
||||
float h = 10;
|
||||
int type = CV_MAKE_TYPE(depth, 3);
|
||||
const float h = 10;
|
||||
const int type = CV_MAKE_TYPE(depth, 3);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::FastNonLocalMeansDenoising fnlmd;
|
||||
|
||||
fnlmd.labMethod(d_src, d_dst, h, h, search_widow_size, block_size);
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
fnlmd.labMethod(d_src, d_dst, h, h, search_widow_size, block_size);
|
||||
}
|
||||
TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size);
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
|
||||
}
|
||||
TEST_CYCLE() cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -3,139 +3,194 @@
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
struct KeypointIdxCompare
|
||||
{
|
||||
std::vector<cv::KeyPoint>* keypoints;
|
||||
|
||||
explicit KeypointIdxCompare(std::vector<cv::KeyPoint>* _keypoints) : keypoints(_keypoints) {}
|
||||
|
||||
bool operator ()(size_t i1, size_t i2) const
|
||||
{
|
||||
cv::KeyPoint kp1 = (*keypoints)[i1];
|
||||
cv::KeyPoint kp2 = (*keypoints)[i2];
|
||||
if (kp1.pt.x != kp2.pt.x)
|
||||
return kp1.pt.x < kp2.pt.x;
|
||||
if (kp1.pt.y != kp2.pt.y)
|
||||
return kp1.pt.y < kp2.pt.y;
|
||||
if (kp1.response != kp2.response)
|
||||
return kp1.response < kp2.response;
|
||||
return kp1.octave < kp2.octave;
|
||||
}
|
||||
};
|
||||
|
||||
static void sortKeyPoints(std::vector<cv::KeyPoint>& keypoints, cv::InputOutputArray _descriptors = cv::noArray())
|
||||
{
|
||||
std::vector<size_t> indexies(keypoints.size());
|
||||
for (size_t i = 0; i < indexies.size(); ++i)
|
||||
indexies[i] = i;
|
||||
|
||||
std::sort(indexies.begin(), indexies.end(), KeypointIdxCompare(&keypoints));
|
||||
|
||||
std::vector<cv::KeyPoint> new_keypoints;
|
||||
cv::Mat new_descriptors;
|
||||
|
||||
new_keypoints.resize(keypoints.size());
|
||||
|
||||
cv::Mat descriptors;
|
||||
if (_descriptors.needed())
|
||||
{
|
||||
descriptors = _descriptors.getMat();
|
||||
new_descriptors.create(descriptors.size(), descriptors.type());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < indexies.size(); ++i)
|
||||
{
|
||||
size_t new_idx = indexies[i];
|
||||
new_keypoints[i] = keypoints[new_idx];
|
||||
if (!new_descriptors.empty())
|
||||
descriptors.row((int) new_idx).copyTo(new_descriptors.row((int) i));
|
||||
}
|
||||
|
||||
keypoints.swap(new_keypoints);
|
||||
if (_descriptors.needed())
|
||||
new_descriptors.copyTo(_descriptors);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SURF
|
||||
|
||||
DEF_PARAM_TEST_1(Image, string);
|
||||
|
||||
PERF_TEST_P(Image, Features2D_SURF, Values<string>("gpu/perf/aloe.png"))
|
||||
PERF_TEST_P(Image, Features2D_SURF,
|
||||
Values<string>("gpu/perf/aloe.png"))
|
||||
{
|
||||
declare.time(50.0);
|
||||
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::SURF_GPU d_surf;
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
const cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_keypoints, d_descriptors;
|
||||
|
||||
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
TEST_CYCLE() d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
}
|
||||
std::vector<cv::KeyPoint> gpu_keypoints;
|
||||
d_surf.downloadKeypoints(d_keypoints, gpu_keypoints);
|
||||
|
||||
GPU_SANITY_CHECK(d_descriptors, 1e-4);
|
||||
GPU_SANITY_CHECK_KEYPOINTS(SURF, d_keypoints);
|
||||
cv::Mat gpu_descriptors(d_descriptors);
|
||||
|
||||
sortKeyPoints(gpu_keypoints, gpu_descriptors);
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(gpu_keypoints);
|
||||
SANITY_CHECK(gpu_descriptors, 1e-3);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::SURF surf;
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::Mat descriptors;
|
||||
std::vector<cv::KeyPoint> cpu_keypoints;
|
||||
cv::Mat cpu_descriptors;
|
||||
|
||||
surf(img, cv::noArray(), keypoints, descriptors);
|
||||
TEST_CYCLE() surf(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
keypoints.clear();
|
||||
surf(img, cv::noArray(), keypoints, descriptors);
|
||||
}
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(keypoints);
|
||||
SANITY_CHECK(descriptors, 1e-4);
|
||||
SANITY_CHECK_KEYPOINTS(cpu_keypoints);
|
||||
SANITY_CHECK(cpu_descriptors);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// FAST
|
||||
|
||||
PERF_TEST_P(Image, Features2D_FAST, Values<string>("gpu/perf/aloe.png"))
|
||||
DEF_PARAM_TEST(Image_Threshold_NonMaxSupression, string, int, bool);
|
||||
|
||||
PERF_TEST_P(Image_Threshold_NonMaxSupression, Features2D_FAST,
|
||||
Combine(Values<string>("gpu/perf/aloe.png"),
|
||||
Values(20),
|
||||
Bool()))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
const int threshold = GET_PARAM(1);
|
||||
const bool nonMaxSuppersion = GET_PARAM(2);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::FAST_GPU d_fast(20);
|
||||
cv::gpu::FAST_GPU d_fast(threshold, nonMaxSuppersion, 0.5);
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
const cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_keypoints;
|
||||
|
||||
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
|
||||
TEST_CYCLE() d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
|
||||
}
|
||||
std::vector<cv::KeyPoint> gpu_keypoints;
|
||||
d_fast.downloadKeypoints(d_keypoints, gpu_keypoints);
|
||||
|
||||
GPU_SANITY_CHECK_RESPONSE(FAST, d_keypoints);
|
||||
sortKeyPoints(gpu_keypoints);
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(gpu_keypoints);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
std::vector<cv::KeyPoint> cpu_keypoints;
|
||||
|
||||
cv::FAST(img, keypoints, 20);
|
||||
TEST_CYCLE() cv::FAST(img, cpu_keypoints, threshold, nonMaxSuppersion);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
keypoints.clear();
|
||||
cv::FAST(img, keypoints, 20);
|
||||
}
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(keypoints);
|
||||
SANITY_CHECK_KEYPOINTS(cpu_keypoints);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ORB
|
||||
|
||||
PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.png"))
|
||||
DEF_PARAM_TEST(Image_NFeatures, string, int);
|
||||
|
||||
PERF_TEST_P(Image_NFeatures, Features2D_ORB,
|
||||
Combine(Values<string>("gpu/perf/aloe.png"),
|
||||
Values(4000)))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
const int nFeatures = GET_PARAM(1);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::ORB_GPU d_orb(4000);
|
||||
cv::gpu::ORB_GPU d_orb(nFeatures);
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
const cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_keypoints, d_descriptors;
|
||||
|
||||
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
TEST_CYCLE() d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
}
|
||||
std::vector<cv::KeyPoint> gpu_keypoints;
|
||||
d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints);
|
||||
|
||||
GPU_SANITY_CHECK_KEYPOINTS(ORB, d_keypoints);
|
||||
GPU_SANITY_CHECK(d_descriptors);
|
||||
cv::Mat gpu_descriptors(d_descriptors);
|
||||
|
||||
gpu_keypoints.resize(10);
|
||||
gpu_descriptors = gpu_descriptors.rowRange(0, 10);
|
||||
|
||||
sortKeyPoints(gpu_keypoints, gpu_descriptors);
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(gpu_keypoints);
|
||||
SANITY_CHECK(gpu_descriptors);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::ORB orb(4000);
|
||||
cv::ORB orb(nFeatures);
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::Mat descriptors;
|
||||
std::vector<cv::KeyPoint> cpu_keypoints;
|
||||
cv::Mat cpu_descriptors;
|
||||
|
||||
orb(img, cv::noArray(), keypoints, descriptors);
|
||||
TEST_CYCLE() orb(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
keypoints.clear();
|
||||
orb(img, cv::noArray(), keypoints, descriptors);
|
||||
}
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(keypoints);
|
||||
SANITY_CHECK(descriptors);
|
||||
SANITY_CHECK_KEYPOINTS(cpu_keypoints);
|
||||
SANITY_CHECK(cpu_descriptors);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,166 +199,165 @@ PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.png"))
|
||||
|
||||
DEF_PARAM_TEST(DescSize_Norm, int, NormType);
|
||||
|
||||
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch,
|
||||
Combine(Values(64, 128, 256),
|
||||
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
int desc_size = GET_PARAM(0);
|
||||
int normType = GET_PARAM(1);
|
||||
const int desc_size = GET_PARAM(0);
|
||||
const int normType = GET_PARAM(1);
|
||||
|
||||
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
const int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
|
||||
cv::Mat query(3000, desc_size, type);
|
||||
fillRandom(query);
|
||||
declare.in(query, WARMUP_RNG);
|
||||
|
||||
cv::Mat train(3000, desc_size, type);
|
||||
fillRandom(train);
|
||||
declare.in(train, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::BFMatcher_GPU d_matcher(normType);
|
||||
|
||||
cv::gpu::GpuMat d_query(query);
|
||||
cv::gpu::GpuMat d_train(train);
|
||||
const cv::gpu::GpuMat d_query(query);
|
||||
const cv::gpu::GpuMat d_train(train);
|
||||
cv::gpu::GpuMat d_trainIdx, d_distance;
|
||||
|
||||
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
}
|
||||
std::vector<cv::DMatch> gpu_matches;
|
||||
d_matcher.matchDownload(d_trainIdx, d_distance, gpu_matches);
|
||||
|
||||
GPU_SANITY_CHECK(d_trainIdx);
|
||||
GPU_SANITY_CHECK(d_distance);
|
||||
SANITY_CHECK_MATCHES(gpu_matches);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::BFMatcher matcher(normType);
|
||||
|
||||
std::vector<cv::DMatch> matches;
|
||||
std::vector<cv::DMatch> cpu_matches;
|
||||
|
||||
matcher.match(query, train, matches);
|
||||
TEST_CYCLE() matcher.match(query, train, cpu_matches);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
matcher.match(query, train, matches);
|
||||
}
|
||||
|
||||
SANITY_CHECK(matches);
|
||||
SANITY_CHECK_MATCHES(cpu_matches);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BFKnnMatch
|
||||
|
||||
static void toOneRowMatches(const std::vector< std::vector<cv::DMatch> >& src, std::vector<cv::DMatch>& dst)
|
||||
{
|
||||
dst.clear();
|
||||
for (size_t i = 0; i < src.size(); ++i)
|
||||
for (size_t j = 0; j < src[i].size(); ++j)
|
||||
dst.push_back(src[i][j]);
|
||||
}
|
||||
|
||||
DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
|
||||
|
||||
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine(
|
||||
Values(64, 128, 256),
|
||||
Values(2, 3),
|
||||
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch,
|
||||
Combine(Values(64, 128, 256),
|
||||
Values(2, 3),
|
||||
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
|
||||
{
|
||||
declare.time(30.0);
|
||||
|
||||
int desc_size = GET_PARAM(0);
|
||||
int k = GET_PARAM(1);
|
||||
int normType = GET_PARAM(2);
|
||||
const int desc_size = GET_PARAM(0);
|
||||
const int k = GET_PARAM(1);
|
||||
const int normType = GET_PARAM(2);
|
||||
|
||||
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
const int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
|
||||
cv::Mat query(3000, desc_size, type);
|
||||
fillRandom(query);
|
||||
declare.in(query, WARMUP_RNG);
|
||||
|
||||
cv::Mat train(3000, desc_size, type);
|
||||
fillRandom(train);
|
||||
declare.in(train, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::BFMatcher_GPU d_matcher(normType);
|
||||
|
||||
cv::gpu::GpuMat d_query(query);
|
||||
cv::gpu::GpuMat d_train(train);
|
||||
const cv::gpu::GpuMat d_query(query);
|
||||
const cv::gpu::GpuMat d_train(train);
|
||||
cv::gpu::GpuMat d_trainIdx, d_distance, d_allDist;
|
||||
|
||||
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
|
||||
TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
|
||||
}
|
||||
std::vector< std::vector<cv::DMatch> > matchesTbl;
|
||||
d_matcher.knnMatchDownload(d_trainIdx, d_distance, matchesTbl);
|
||||
|
||||
GPU_SANITY_CHECK(d_trainIdx);
|
||||
GPU_SANITY_CHECK(d_distance);
|
||||
std::vector<cv::DMatch> gpu_matches;
|
||||
toOneRowMatches(matchesTbl, gpu_matches);
|
||||
|
||||
SANITY_CHECK_MATCHES(gpu_matches);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::BFMatcher matcher(normType);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
std::vector< std::vector<cv::DMatch> > matchesTbl;
|
||||
|
||||
matcher.knnMatch(query, train, matches, k);
|
||||
TEST_CYCLE() matcher.knnMatch(query, train, matchesTbl, k);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
matcher.knnMatch(query, train, matches, k);
|
||||
}
|
||||
std::vector<cv::DMatch> cpu_matches;
|
||||
toOneRowMatches(matchesTbl, cpu_matches);
|
||||
|
||||
SANITY_CHECK(matches);
|
||||
SANITY_CHECK_MATCHES(cpu_matches);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BFRadiusMatch
|
||||
|
||||
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch,
|
||||
Combine(Values(64, 128, 256),
|
||||
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
|
||||
{
|
||||
declare.time(30.0);
|
||||
|
||||
int desc_size = GET_PARAM(0);
|
||||
int normType = GET_PARAM(1);
|
||||
const int desc_size = GET_PARAM(0);
|
||||
const int normType = GET_PARAM(1);
|
||||
|
||||
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
const int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
const float maxDistance = 10000;
|
||||
|
||||
cv::Mat query(3000, desc_size, type);
|
||||
fillRandom(query, 0.0, 1.0);
|
||||
declare.in(query, WARMUP_RNG);
|
||||
|
||||
cv::Mat train(3000, desc_size, type);
|
||||
fillRandom(train, 0.0, 1.0);
|
||||
declare.in(train, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::BFMatcher_GPU d_matcher(normType);
|
||||
|
||||
cv::gpu::GpuMat d_query(query);
|
||||
cv::gpu::GpuMat d_train(train);
|
||||
const cv::gpu::GpuMat d_query(query);
|
||||
const cv::gpu::GpuMat d_train(train);
|
||||
cv::gpu::GpuMat d_trainIdx, d_nMatches, d_distance;
|
||||
|
||||
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
|
||||
TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
|
||||
}
|
||||
std::vector< std::vector<cv::DMatch> > matchesTbl;
|
||||
d_matcher.radiusMatchDownload(d_trainIdx, d_distance, d_nMatches, matchesTbl);
|
||||
|
||||
GPU_SANITY_CHECK(d_trainIdx);
|
||||
GPU_SANITY_CHECK(d_distance);
|
||||
std::vector<cv::DMatch> gpu_matches;
|
||||
toOneRowMatches(matchesTbl, gpu_matches);
|
||||
|
||||
SANITY_CHECK_MATCHES(gpu_matches);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::BFMatcher matcher(normType);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
std::vector< std::vector<cv::DMatch> > matchesTbl;
|
||||
|
||||
matcher.radiusMatch(query, train, matches, 2.0);
|
||||
TEST_CYCLE() matcher.radiusMatch(query, train, matchesTbl, maxDistance);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
matcher.radiusMatch(query, train, matches, 2.0);
|
||||
}
|
||||
std::vector<cv::DMatch> cpu_matches;
|
||||
toOneRowMatches(matchesTbl, cpu_matches);
|
||||
|
||||
SANITY_CHECK(matches);
|
||||
SANITY_CHECK_MATCHES(cpu_matches);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@@ -3,48 +3,39 @@
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Blur
|
||||
|
||||
DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), Values(3, 5, 7)))
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8UC1, CV_8UC4),
|
||||
Values(3, 5, 7)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
const int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
|
||||
TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize));
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::blur(src, dst, cv::Size(ksize, ksize));
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::blur(src, dst, cv::Size(ksize, ksize));
|
||||
}
|
||||
TEST_CYCLE() cv::blur(src, dst, cv::Size(ksize, ksize));
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -57,38 +48,28 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Valu
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
const int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
|
||||
TEST_CYCLE() cv::gpu::Sobel(d_src, dst, -1, 1, 1, d_buf, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::Sobel(src, dst, -1, 1, 1, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::Sobel(src, dst, -1, 1, 1, ksize);
|
||||
}
|
||||
TEST_CYCLE() cv::Sobel(src, dst, -1, 1, 1, ksize);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -101,37 +82,27 @@ PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
|
||||
TEST_CYCLE() cv::gpu::Scharr(d_src, dst, -1, 1, 0, d_buf);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::Scharr(src, dst, -1, 1, 0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::Scharr(src, dst, -1, 1, 0);
|
||||
}
|
||||
TEST_CYCLE() cv::Scharr(src, dst, -1, 1, 0);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -144,38 +115,28 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZE
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
const int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
|
||||
TEST_CYCLE() cv::gpu::GaussianBlur(d_src, dst, cv::Size(ksize, ksize), d_buf, 0.5);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
|
||||
}
|
||||
TEST_CYCLE() cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -188,37 +149,27 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
const int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
|
||||
TEST_CYCLE() cv::gpu::Laplacian(d_src, dst, -1, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::Laplacian(src, dst, -1, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::Laplacian(src, dst, -1, ksize);
|
||||
}
|
||||
TEST_CYCLE() cv::Laplacian(src, dst, -1, ksize);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -231,39 +182,29 @@ PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
const cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::erode(d_src, d_dst, ker, d_buf);
|
||||
TEST_CYCLE() cv::gpu::erode(d_src, dst, ker, d_buf);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::erode(d_src, d_dst, ker, d_buf);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::erode(src, dst, ker);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::erode(src, dst, ker);
|
||||
}
|
||||
TEST_CYCLE() cv::erode(src, dst, ker);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -276,39 +217,29 @@ PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
const cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
|
||||
TEST_CYCLE() cv::gpu::dilate(d_src, dst, ker, d_buf);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::dilate(src, dst, ker);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::dilate(src, dst, ker);
|
||||
}
|
||||
TEST_CYCLE() cv::dilate(src, dst, ker);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -326,41 +257,31 @@ PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Val
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int morphOp = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
const int morphOp = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
const cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::GpuMat d_buf1;
|
||||
cv::gpu::GpuMat d_buf2;
|
||||
|
||||
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
|
||||
TEST_CYCLE() cv::gpu::morphologyEx(d_src, dst, morphOp, ker, d_buf1, d_buf2);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::morphologyEx(src, dst, morphOp, ker);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::morphologyEx(src, dst, morphOp, ker);
|
||||
}
|
||||
TEST_CYCLE() cv::morphologyEx(src, dst, morphOp, ker);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -373,43 +294,31 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, V
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
const int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
cv::Mat kernel(ksize, ksize, CV_32FC1);
|
||||
fillRandom(kernel, 0.0, 1.0);
|
||||
declare.in(kernel, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
|
||||
TEST_CYCLE() cv::gpu::filter2D(d_src, dst, -1, kernel);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::filter2D(src, dst, -1, kernel);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::filter2D(src, dst, -1, kernel);
|
||||
}
|
||||
TEST_CYCLE() cv::filter2D(src, dst, -1, kernel);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -3,8 +3,6 @@
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
DEF_PARAM_TEST_1(Image, string);
|
||||
|
||||
struct GreedyLabeling
|
||||
@@ -100,28 +98,45 @@ struct GreedyLabeling
|
||||
dot* stack;
|
||||
};
|
||||
|
||||
PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/aloe-disp.png"))
|
||||
PERF_TEST_P(Image, DISABLED_Labeling_ConnectivityMask,
|
||||
Values<string>("gpu/labeling/aloe-disp.png"))
|
||||
{
|
||||
declare.time(1.0);
|
||||
|
||||
cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
const cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(image.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_image(image);
|
||||
cv::gpu::GpuMat mask;
|
||||
mask.create(image.rows, image.cols, CV_8UC1);
|
||||
|
||||
TEST_CYCLE() cv::gpu::connectivityMask(d_image, mask, cv::Scalar::all(0), cv::Scalar::all(2));
|
||||
|
||||
GPU_SANITY_CHECK(mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
PERF_TEST_P(Image, DISABLED_Labeling_ConnectedComponents,
|
||||
Values<string>("gpu/labeling/aloe-disp.png"))
|
||||
{
|
||||
declare.time(1.0);
|
||||
|
||||
const cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(image.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_mask;
|
||||
cv::gpu::connectivityMask(cv::gpu::GpuMat(image), d_mask, cv::Scalar::all(0), cv::Scalar::all(2));
|
||||
|
||||
cv::gpu::GpuMat components;
|
||||
components.create(image.rows, image.cols, CV_32SC1);
|
||||
|
||||
cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2));
|
||||
|
||||
ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components));
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::labelComponents(mask, components);
|
||||
}
|
||||
TEST_CYCLE() cv::gpu::labelComponents(d_mask, components);
|
||||
|
||||
GPU_SANITY_CHECK(components);
|
||||
}
|
||||
@@ -129,17 +144,9 @@ PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/al
|
||||
{
|
||||
GreedyLabeling host(image);
|
||||
|
||||
host(host._labels);
|
||||
TEST_CYCLE() host(host._labels);
|
||||
|
||||
declare.time(1.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
host(host._labels);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(host._labels);
|
||||
cv::Mat components = host._labels;
|
||||
CPU_SANITY_CHECK(components);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@@ -1,7 +1,5 @@
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
namespace{
|
||||
|
||||
static void printOsInfo()
|
||||
{
|
||||
#if defined _WIN32
|
||||
@@ -69,6 +67,4 @@ static void printCudaInfo()
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
CV_PERF_TEST_MAIN(gpu, printCudaInfo())
|
||||
CV_PERF_TEST_MAIN(gpu, printCudaInfo())
|
||||
|
@@ -3,137 +3,112 @@
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SetTo
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16U, CV_32F, CV_64F),
|
||||
GPU_CHANNELS_1_3_4))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
const int channels = GET_PARAM(2);
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
const int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Scalar val(1, 2, 3, 4);
|
||||
const cv::Scalar val(1, 2, 3, 4);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(size, type);
|
||||
cv::gpu::GpuMat dst(size, type);
|
||||
|
||||
d_src.setTo(val);
|
||||
TEST_CYCLE() dst.setTo(val);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.setTo(val);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_src);
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat src(size, type);
|
||||
cv::Mat dst(size, type);
|
||||
|
||||
src.setTo(val);
|
||||
TEST_CYCLE() dst.setTo(val);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.setTo(val);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(src);
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SetToMasked
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16U, CV_32F, CV_64F),
|
||||
GPU_CHANNELS_1_3_4))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
const int channels = GET_PARAM(2);
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
const int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat mask(size, CV_8UC1);
|
||||
fillRandom(mask, 0, 2);
|
||||
declare.in(src, mask, WARMUP_RNG);
|
||||
|
||||
cv::Scalar val(1, 2, 3, 4);
|
||||
const cv::Scalar val(1, 2, 3, 4);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_mask(mask);
|
||||
cv::gpu::GpuMat dst(src);
|
||||
const cv::gpu::GpuMat d_mask(mask);
|
||||
|
||||
d_src.setTo(val, d_mask);
|
||||
TEST_CYCLE() dst.setTo(val, d_mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.setTo(val, d_mask);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_src);
|
||||
GPU_SANITY_CHECK(dst, 1e-10);
|
||||
}
|
||||
else
|
||||
{
|
||||
src.setTo(val, mask);
|
||||
cv::Mat dst = src;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.setTo(val, mask);
|
||||
}
|
||||
TEST_CYCLE() dst.setTo(val, mask);
|
||||
|
||||
CPU_SANITY_CHECK(src);
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// CopyToMasked
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16U, CV_32F, CV_64F),
|
||||
GPU_CHANNELS_1_3_4))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
const int channels = GET_PARAM(2);
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
const int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat mask(size, CV_8UC1);
|
||||
fillRandom(mask, 0, 2);
|
||||
declare.in(src, mask, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_mask(mask);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
const cv::gpu::GpuMat d_mask(mask);
|
||||
cv::gpu::GpuMat dst(d_src.size(), d_src.type(), cv::Scalar::all(0));
|
||||
|
||||
d_src.copyTo(d_dst, d_mask);
|
||||
TEST_CYCLE() d_src.copyTo(dst, d_mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.copyTo(d_dst, d_mask);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst, 1e-10);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
cv::Mat dst(src.size(), src.type(), cv::Scalar::all(0));
|
||||
|
||||
src.copyTo(dst, mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.copyTo(dst, mask);
|
||||
}
|
||||
TEST_CYCLE() src.copyTo(dst, mask);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@@ -144,42 +119,36 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Valu
|
||||
|
||||
DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth);
|
||||
|
||||
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F)))
|
||||
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16U, CV_32F, CV_64F),
|
||||
Values(CV_8U, CV_16U, CV_32F, CV_64F)))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth1 = GET_PARAM(1);
|
||||
int depth2 = GET_PARAM(2);
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth1 = GET_PARAM(1);
|
||||
const int depth2 = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, depth1);
|
||||
fillRandom(src);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
const double a = 0.5;
|
||||
const double b = 1.0;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
|
||||
TEST_CYCLE() d_src.convertTo(dst, depth2, a, b);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
GPU_SANITY_CHECK(dst, 1e-10);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
src.convertTo(dst, depth2, 0.5, 1.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.convertTo(dst, depth2, 0.5, 1.0);
|
||||
}
|
||||
TEST_CYCLE() src.convertTo(dst, depth2, a, b);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@@ -3,90 +3,47 @@
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// HOG
|
||||
|
||||
DEF_PARAM_TEST_1(Image, string);
|
||||
|
||||
PERF_TEST_P(Image, ObjDetect_HOG, Values<string>("gpu/hog/road.png"))
|
||||
PERF_TEST_P(Image, ObjDetect_HOG,
|
||||
Values<string>("gpu/hog/road.png",
|
||||
"gpu/caltech/image_00000009_0.png",
|
||||
"gpu/caltech/image_00000032_0.png",
|
||||
"gpu/caltech/image_00000165_0.png",
|
||||
"gpu/caltech/image_00000261_0.png",
|
||||
"gpu/caltech/image_00000469_0.png",
|
||||
"gpu/caltech/image_00000527_0.png",
|
||||
"gpu/caltech/image_00000574_0.png"))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
std::vector<cv::Rect> found_locations;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
const cv::gpu::GpuMat d_img(img);
|
||||
std::vector<cv::Rect> gpu_found_locations;
|
||||
|
||||
cv::gpu::HOGDescriptor d_hog;
|
||||
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
}
|
||||
SANITY_CHECK(gpu_found_locations);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<cv::Rect> cpu_found_locations;
|
||||
|
||||
cv::HOGDescriptor hog;
|
||||
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
}
|
||||
SANITY_CHECK(cpu_found_locations);
|
||||
}
|
||||
|
||||
SANITY_CHECK(found_locations);
|
||||
}
|
||||
|
||||
//===========test for CalTech data =============//
|
||||
DEF_PARAM_TEST_1(HOG, string);
|
||||
|
||||
PERF_TEST_P(HOG, CalTech, Values<string>("gpu/caltech/image_00000009_0.png", "gpu/caltech/image_00000032_0.png",
|
||||
"gpu/caltech/image_00000165_0.png", "gpu/caltech/image_00000261_0.png", "gpu/caltech/image_00000469_0.png",
|
||||
"gpu/caltech/image_00000527_0.png", "gpu/caltech/image_00000574_0.png"))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
std::vector<cv::Rect> found_locations;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
|
||||
cv::gpu::HOGDescriptor d_hog;
|
||||
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::HOGDescriptor hog;
|
||||
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(found_locations);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
@@ -96,9 +53,9 @@ typedef pair<string, string> pair_string;
|
||||
DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
|
||||
|
||||
PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
|
||||
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
|
||||
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
@@ -106,33 +63,28 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
|
||||
cv::gpu::CascadeClassifier_GPU d_cascade;
|
||||
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_objects_buffer;
|
||||
const cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat objects_buffer;
|
||||
int detections_num = 0;
|
||||
|
||||
d_cascade.detectMultiScale(d_img, d_objects_buffer);
|
||||
TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_cascade.detectMultiScale(d_img, d_objects_buffer);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_objects_buffer);
|
||||
std::vector<cv::Rect> gpu_rects(detections_num);
|
||||
cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
|
||||
objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
|
||||
cv::groupRectangles(gpu_rects, 3, 0.2);
|
||||
SANITY_CHECK(gpu_rects);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::CascadeClassifier cascade;
|
||||
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
|
||||
|
||||
std::vector<cv::Rect> rects;
|
||||
std::vector<cv::Rect> cpu_rects;
|
||||
|
||||
cascade.detectMultiScale(img, rects);
|
||||
TEST_CYCLE() cascade.detectMultiScale(img, cpu_rects);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cascade.detectMultiScale(img, rects);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(rects);
|
||||
SANITY_CHECK(cpu_rects);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,9 +92,9 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
|
||||
// LBP cascade
|
||||
|
||||
PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
|
||||
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
|
||||
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
@@ -150,34 +102,27 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
|
||||
cv::gpu::CascadeClassifier_GPU d_cascade;
|
||||
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_gpu_rects;
|
||||
const cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat objects_buffer;
|
||||
int detections_num = 0;
|
||||
|
||||
d_cascade.detectMultiScale(d_img, d_gpu_rects);
|
||||
TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_cascade.detectMultiScale(d_img, d_gpu_rects);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_gpu_rects);
|
||||
std::vector<cv::Rect> gpu_rects(detections_num);
|
||||
cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
|
||||
objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
|
||||
cv::groupRectangles(gpu_rects, 3, 0.2);
|
||||
SANITY_CHECK(gpu_rects);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::CascadeClassifier cascade;
|
||||
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
|
||||
|
||||
std::vector<cv::Rect> rects;
|
||||
std::vector<cv::Rect> cpu_rects;
|
||||
|
||||
cascade.detectMultiScale(img, rects);
|
||||
TEST_CYCLE() cascade.detectMultiScale(img, cpu_rects);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cascade.detectMultiScale(img, rects);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(rects);
|
||||
SANITY_CHECK(cpu_rects);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
File diff suppressed because it is too large
Load Diff
@@ -2,13 +2,6 @@
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
void fillRandom(Mat& m, double a, double b)
|
||||
{
|
||||
RNG rng(123456789);
|
||||
rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
|
||||
}
|
||||
|
||||
Mat readImage(const string& fileName, int flags)
|
||||
{
|
||||
@@ -188,4 +181,4 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
|
||||
};
|
||||
|
||||
*os << str[info.code];
|
||||
}
|
||||
}
|
||||
|
@@ -2,11 +2,9 @@
|
||||
#define __OPENCV_PERF_GPU_UTILITY_HPP__
|
||||
|
||||
#include "opencv2/core/core.hpp"
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
#include "opencv2/imgproc/imgproc.hpp"
|
||||
#include "opencv2/ts/ts_perf.hpp"
|
||||
|
||||
void fillRandom(cv::Mat& m, double a = 0.0, double b = 255.0);
|
||||
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
|
||||
|
||||
using perf::MatType;
|
||||
@@ -17,12 +15,13 @@ CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONS
|
||||
|
||||
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
|
||||
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
|
||||
|
||||
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING, cv::NORM_MINMAX)
|
||||
|
||||
const int Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4;
|
||||
enum { Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4 };
|
||||
CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
|
||||
#define GPU_CHANNELS_1_3_4 testing::Values(Gray, BGR, BGRA)
|
||||
#define GPU_CHANNELS_1_3 testing::Values(Gray, BGR)
|
||||
#define GPU_CHANNELS_1_3_4 testing::Values(MatCn(Gray), MatCn(BGR), MatCn(BGRA))
|
||||
#define GPU_CHANNELS_1_3 testing::Values(MatCn(Gray), MatCn(BGR))
|
||||
|
||||
struct CvtColorInfo
|
||||
{
|
||||
@@ -30,7 +29,8 @@ struct CvtColorInfo
|
||||
int dcn;
|
||||
int code;
|
||||
|
||||
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
|
||||
CvtColorInfo() {}
|
||||
explicit CvtColorInfo(int scn_, int dcn_, int code_) : scn(scn_), dcn(dcn_), code(code_) {}
|
||||
};
|
||||
void PrintTo(const CvtColorInfo& info, std::ostream* os);
|
||||
|
||||
@@ -46,39 +46,18 @@ DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn);
|
||||
|
||||
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p)
|
||||
|
||||
#define GPU_SANITY_CHECK(dmat, ...) \
|
||||
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
|
||||
|
||||
#define GPU_SANITY_CHECK(mat, ...) \
|
||||
do{ \
|
||||
cv::Mat d##dmat(dmat); \
|
||||
SANITY_CHECK(d##dmat, ## __VA_ARGS__); \
|
||||
cv::Mat gpu_##mat(mat); \
|
||||
SANITY_CHECK(gpu_##mat, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define CPU_SANITY_CHECK(cmat, ...) \
|
||||
#define CPU_SANITY_CHECK(mat, ...) \
|
||||
do{ \
|
||||
SANITY_CHECK(cmat, ## __VA_ARGS__); \
|
||||
cv::Mat cpu_##mat(mat); \
|
||||
SANITY_CHECK(cpu_##mat, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define GPU_SANITY_CHECK_KEYPOINTS(alg, dmat, ...) \
|
||||
do{ \
|
||||
cv::Mat d##dmat(dmat); \
|
||||
cv::Mat __pt_x = d##dmat.row(cv::gpu::alg##_GPU::X_ROW); \
|
||||
cv::Mat __pt_y = d##dmat.row(cv::gpu::alg##_GPU::Y_ROW); \
|
||||
cv::Mat __angle = d##dmat.row(cv::gpu::alg##_GPU::ANGLE_ROW); \
|
||||
cv::Mat __octave = d##dmat.row(cv::gpu::alg##_GPU::OCTAVE_ROW); \
|
||||
cv::Mat __size = d##dmat.row(cv::gpu::alg##_GPU::SIZE_ROW); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-pt-x-row", __pt_x, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-pt-y-row", __pt_y, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-angle-row", __angle, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "octave-row", __octave, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-pt-size-row", __size, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define GPU_SANITY_CHECK_RESPONSE(alg, dmat, ...) \
|
||||
do{ \
|
||||
cv::Mat d##dmat(dmat); \
|
||||
cv::Mat __response = d##dmat.row(cv::gpu::alg##_GPU::RESPONSE_ROW); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-response-row", __response, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
|
||||
|
||||
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__
|
||||
|
@@ -648,7 +648,7 @@ namespace cv { namespace gpu { namespace device
|
||||
tWeight += gmm_weight(mode * frame.rows + y, x);
|
||||
if (tWeight > c_TB)
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
|
||||
@@ -761,4 +761,4 @@ namespace cv { namespace gpu { namespace device
|
||||
}}}
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
#endif /* CUDA_DISABLER */
|
||||
|
@@ -194,10 +194,10 @@ namespace cv { namespace gpu { namespace device
|
||||
if ( y > 0 && connected(intensity, image(y - 1, x)))
|
||||
c |= UP;
|
||||
|
||||
if ( x - 1 < image.cols && connected(intensity, image(y, x + 1)))
|
||||
if ( x + 1 < image.cols && connected(intensity, image(y, x + 1)))
|
||||
c |= RIGHT;
|
||||
|
||||
if ( y - 1 < image.rows && connected(intensity, image(y + 1, x)))
|
||||
if ( y + 1 < image.rows && connected(intensity, image(y + 1, x)))
|
||||
c |= DOWN;
|
||||
|
||||
components(y, x) = c;
|
||||
|
@@ -2284,15 +2284,18 @@ namespace arithm
|
||||
|
||||
template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarAnd<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarOr<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarXor<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@@ -2280,11 +2280,11 @@ namespace
|
||||
{
|
||||
typedef void (*bit_scalar_func_t)(PtrStepSzb src1, unsigned int src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template <bit_scalar_func_t func> struct BitScalar
|
||||
template <typename T, bit_scalar_func_t func> struct BitScalar
|
||||
{
|
||||
static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
func(src, static_cast<unsigned int>(sc.val[0]), dst, stream);
|
||||
func(src, saturate_cast<T>(sc.val[0]), dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -2292,14 +2292,12 @@ namespace
|
||||
{
|
||||
static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
Scalar_<unsigned int> isc = sc;
|
||||
|
||||
unsigned int packedVal = 0;
|
||||
|
||||
packedVal |= (isc.val[0] & 0xffff);
|
||||
packedVal |= (isc.val[1] & 0xffff) << 8;
|
||||
packedVal |= (isc.val[2] & 0xffff) << 16;
|
||||
packedVal |= (isc.val[3] & 0xffff) << 24;
|
||||
packedVal |= (saturate_cast<unsigned char>(sc.val[0]) & 0xffff);
|
||||
packedVal |= (saturate_cast<unsigned char>(sc.val[1]) & 0xffff) << 8;
|
||||
packedVal |= (saturate_cast<unsigned char>(sc.val[2]) & 0xffff) << 16;
|
||||
packedVal |= (saturate_cast<unsigned char>(sc.val[3]) & 0xffff) << 24;
|
||||
|
||||
func(src, packedVal, dst, stream);
|
||||
}
|
||||
@@ -2330,7 +2328,7 @@ namespace
|
||||
oSizeROI.width = src.cols;
|
||||
oSizeROI.height = src.rows;
|
||||
|
||||
const npp_t pConstants[] = {static_cast<npp_t>(sc.val[0]), static_cast<npp_t>(sc.val[1]), static_cast<npp_t>(sc.val[2]), static_cast<npp_t>(sc.val[3])};
|
||||
const npp_t pConstants[] = {saturate_cast<npp_t>(sc.val[0]), saturate_cast<npp_t>(sc.val[1]), saturate_cast<npp_t>(sc.val[2]), saturate_cast<npp_t>(sc.val[3])};
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
@@ -2350,7 +2348,7 @@ namespace
|
||||
oSizeROI.width = src.cols;
|
||||
oSizeROI.height = src.rows;
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), static_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@@ -2365,11 +2363,11 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
|
||||
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[5][4] =
|
||||
{
|
||||
{BitScalar< bitScalarAnd<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call},
|
||||
{BitScalar<unsigned char, bitScalarAnd<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call},
|
||||
{0,0,0,0},
|
||||
{BitScalar< bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
|
||||
{BitScalar<unsigned short, bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{BitScalar< bitScalarAnd<unsigned int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
|
||||
{BitScalar<int, bitScalarAnd<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
|
||||
};
|
||||
|
||||
const int depth = src.depth();
|
||||
@@ -2390,11 +2388,11 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea
|
||||
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[5][4] =
|
||||
{
|
||||
{BitScalar< bitScalarOr<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call},
|
||||
{BitScalar<unsigned char, bitScalarOr<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call},
|
||||
{0,0,0,0},
|
||||
{BitScalar< bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
|
||||
{BitScalar<unsigned short, bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{BitScalar< bitScalarOr<unsigned int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
|
||||
{BitScalar<int, bitScalarOr<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
|
||||
};
|
||||
|
||||
const int depth = src.depth();
|
||||
@@ -2415,11 +2413,11 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
|
||||
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[5][4] =
|
||||
{
|
||||
{BitScalar< bitScalarXor<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call},
|
||||
{BitScalar<unsigned char, bitScalarXor<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call},
|
||||
{0,0,0,0},
|
||||
{BitScalar< bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
|
||||
{BitScalar<unsigned short, bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{BitScalar< bitScalarXor<unsigned int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
|
||||
{BitScalar<int, bitScalarXor<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
|
||||
};
|
||||
|
||||
const int depth = src.depth();
|
||||
|
@@ -104,12 +104,12 @@ void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scal
|
||||
|
||||
void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s)
|
||||
{
|
||||
if (!TargetArchs::builtWith(SHARED_ATOMICS) || !DeviceInfo().supports(SHARED_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
|
||||
CV_Assert(!mask.empty() && mask.type() == CV_8U);
|
||||
|
||||
if (mask.size() != components.size() || components.type() != CV_32SC1)
|
||||
components.create(mask.size(), CV_32SC1);
|
||||
if (!deviceSupports(SHARED_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
|
||||
|
||||
components.create(mask.size(), CV_32SC1);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
device::ccl::labelComponents(mask, components, flags, stream);
|
||||
|
@@ -517,6 +517,7 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
dst.setTo(Scalar::all(0));
|
||||
|
||||
funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
@@ -380,6 +380,7 @@ void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr,
|
||||
dstcol[0] = static_cast<uchar>(sumcol[0] / comps.size[parent]);
|
||||
dstcol[1] = static_cast<uchar>(sumcol[1] / comps.size[parent]);
|
||||
dstcol[2] = static_cast<uchar>(sumcol[2] / comps.size[parent]);
|
||||
dstcol[3] = 255;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -206,6 +206,8 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
|
||||
uPyr_[0].setTo(Scalar::all(0));
|
||||
vPyr_[0].setTo(Scalar::all(0));
|
||||
uPyr_[1].setTo(Scalar::all(0));
|
||||
vPyr_[1].setTo(Scalar::all(0));
|
||||
|
||||
|
@@ -232,10 +232,8 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
|
||||
};
|
||||
|
||||
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
|
||||
#ifdef linux
|
||||
// NPP bug on float data
|
||||
useNpp = useNpp && src.depth() != CV_32F;
|
||||
#endif
|
||||
// NPP bug on float data
|
||||
useNpp = useNpp && src.depth() != CV_32F;
|
||||
|
||||
if (useNpp)
|
||||
{
|
||||
@@ -372,10 +370,8 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
|
||||
};
|
||||
|
||||
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
|
||||
#ifdef linux
|
||||
// NPP bug on float data
|
||||
useNpp = useNpp && src.depth() != CV_32F;
|
||||
#endif
|
||||
// NPP bug on float data
|
||||
useNpp = useNpp && src.depth() != CV_32F;
|
||||
|
||||
if (useNpp)
|
||||
{
|
||||
|
@@ -207,11 +207,17 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, MOG, testing::Combine(
|
||||
//////////////////////////////////////////////////////
|
||||
// MOG2
|
||||
|
||||
PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, UseRoi)
|
||||
namespace
|
||||
{
|
||||
IMPLEMENT_PARAM_CLASS(DetectShadow, bool)
|
||||
}
|
||||
|
||||
PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, DetectShadow, UseRoi)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
std::string inputFile;
|
||||
bool useGray;
|
||||
bool detectShadow;
|
||||
bool useRoi;
|
||||
|
||||
virtual void SetUp()
|
||||
@@ -220,10 +226,9 @@ PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, UseRoi)
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
|
||||
|
||||
useGray = GET_PARAM(2);
|
||||
|
||||
useRoi = GET_PARAM(3);
|
||||
detectShadow = GET_PARAM(3);
|
||||
useRoi = GET_PARAM(4);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -237,9 +242,11 @@ GPU_TEST_P(MOG2, Update)
|
||||
ASSERT_FALSE(frame.empty());
|
||||
|
||||
cv::gpu::MOG2_GPU mog2;
|
||||
mog2.bShadowDetection = detectShadow;
|
||||
cv::gpu::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
|
||||
|
||||
cv::BackgroundSubtractorMOG2 mog2_gold;
|
||||
mog2_gold.set("detectShadows", detectShadow);
|
||||
cv::Mat foreground_gold;
|
||||
|
||||
for (int i = 0; i < 10; ++i)
|
||||
@@ -258,11 +265,14 @@ GPU_TEST_P(MOG2, Update)
|
||||
|
||||
mog2_gold(frame, foreground_gold);
|
||||
|
||||
double norm = cv::norm(foreground_gold, cv::Mat(foreground), cv::NORM_L1);
|
||||
|
||||
norm /= foreground_gold.size().area();
|
||||
|
||||
ASSERT_LE(norm, 0.09);
|
||||
if (detectShadow)
|
||||
{
|
||||
ASSERT_MAT_SIMILAR(foreground_gold, foreground, 1e-2);
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT_MAT_NEAR(foreground_gold, foreground, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -277,9 +287,11 @@ GPU_TEST_P(MOG2, getBackgroundImage)
|
||||
cv::Mat frame;
|
||||
|
||||
cv::gpu::MOG2_GPU mog2;
|
||||
mog2.bShadowDetection = detectShadow;
|
||||
cv::gpu::GpuMat foreground;
|
||||
|
||||
cv::BackgroundSubtractorMOG2 mog2_gold;
|
||||
mog2_gold.set("detectShadows", detectShadow);
|
||||
cv::Mat foreground_gold;
|
||||
|
||||
for (int i = 0; i < 10; ++i)
|
||||
@@ -305,6 +317,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, MOG2, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
testing::Values(std::string("768x576.avi")),
|
||||
testing::Values(UseGray(true), UseGray(false)),
|
||||
testing::Values(DetectShadow(true), DetectShadow(false)),
|
||||
WHOLE_SUBMAT));
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
|
@@ -1873,7 +1873,7 @@ PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channel
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
src = randomMat(size, CV_MAKE_TYPE(depth, channels));
|
||||
cv::Scalar_<int> ival = randomScalar(0.0, 255.0);
|
||||
cv::Scalar_<int> ival = randomScalar(0.0, std::numeric_limits<int>::max());
|
||||
val = ival;
|
||||
}
|
||||
};
|
||||
|
@@ -252,6 +252,8 @@ PARAM_TEST_CASE(WarpAffineNPP, cv::gpu::DeviceInfo, MatType, Inverse, Interpolat
|
||||
GPU_TEST_P(WarpAffineNPP, Accuracy)
|
||||
{
|
||||
cv::Mat src = readImageType("stereobp/aloe-L.png", type);
|
||||
ASSERT_FALSE(src.empty());
|
||||
|
||||
cv::Mat M = createTransfomMatrix(src.size(), CV_PI / 4);
|
||||
int flags = interpolation;
|
||||
if (inverse)
|
||||
|
@@ -255,6 +255,8 @@ PARAM_TEST_CASE(WarpPerspectiveNPP, cv::gpu::DeviceInfo, MatType, Inverse, Inter
|
||||
GPU_TEST_P(WarpPerspectiveNPP, Accuracy)
|
||||
{
|
||||
cv::Mat src = readImageType("stereobp/aloe-L.png", type);
|
||||
ASSERT_FALSE(src.empty());
|
||||
|
||||
cv::Mat M = createTransfomMatrix(src.size(), CV_PI / 4);
|
||||
int flags = interpolation;
|
||||
if (inverse)
|
||||
|
@@ -275,7 +275,7 @@ if(WIN32 AND WITH_FFMPEG)
|
||||
COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/Release/${ffmpeg_bare_name_ver}"
|
||||
COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/Debug/${ffmpeg_bare_name_ver}"
|
||||
COMMENT "Copying ${ffmpeg_path} to the output directory")
|
||||
elseif(MSVC)
|
||||
elseif(MSVC AND (CMAKE_GENERATOR MATCHES "Visual"))
|
||||
add_custom_command(TARGET ${the_module} POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/${CMAKE_BUILD_TYPE}/${ffmpeg_bare_name_ver}"
|
||||
COMMENT "Copying ${ffmpeg_path} to the output directory")
|
||||
|
@@ -217,6 +217,12 @@ endif(ANDROID AND ANDROID_EXECUTABLE)
|
||||
|
||||
set(step3_depends ${step2_depends} ${step3_input_files} ${copied_files})
|
||||
|
||||
if(ANDROID)
|
||||
set(LIB_NAME_SUFIX "")
|
||||
else()
|
||||
set(LIB_NAME_SUFIX "${OPENCV_VERSION_MAJOR}${OPENCV_VERSION_MINOR}${OPENCV_VERSION_PATCH}")
|
||||
endif()
|
||||
|
||||
# step 4: build jar
|
||||
if(ANDROID)
|
||||
set(JAR_FILE "${OpenCV_BINARY_DIR}/bin/classes.jar")
|
||||
@@ -241,7 +247,7 @@ if(ANDROID)
|
||||
)
|
||||
endif()
|
||||
else(ANDROID)
|
||||
set(JAR_NAME opencv-${OPENCV_VERSION}.jar)
|
||||
set(JAR_NAME opencv-${LIB_NAME_SUFIX}.jar)
|
||||
set(JAR_FILE "${OpenCV_BINARY_DIR}/bin/${JAR_NAME}")
|
||||
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build.xml.in" "${OpenCV_BINARY_DIR}/build.xml" IMMEDIATE @ONLY)
|
||||
list(APPEND step3_depends "${OpenCV_BINARY_DIR}/build.xml")
|
||||
@@ -294,8 +300,8 @@ endif()
|
||||
|
||||
# Additional target properties
|
||||
set_target_properties(${the_module} PROPERTIES
|
||||
OUTPUT_NAME "${the_module}${OPENCV_DLLVERSION}"
|
||||
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
|
||||
OUTPUT_NAME "${the_module}${LIB_NAME_SUFIX}"
|
||||
#DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
|
||||
ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
|
||||
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
|
||||
INSTALL_NAME_DIR ${OPENCV_LIB_INSTALL_PATH}
|
||||
|
@@ -557,6 +557,15 @@ func_arg_fix = {
|
||||
}, # '', i.e. no class
|
||||
} # func_arg_fix
|
||||
|
||||
|
||||
def getLibVersion(version_hpp_path):
|
||||
version_file = open(version_hpp_path, "rt").read()
|
||||
epoch = re.search("^W*#\W*define\W+CV_VERSION_EPOCH\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
|
||||
major = re.search("^W*#\W*define\W+CV_VERSION_MAJOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
|
||||
minor = re.search("^W*#\W*define\W+CV_VERSION_MINOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
|
||||
revision = re.search("^W*#\W*define\W+CV_VERSION_REVISION\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
|
||||
return (epoch, major, minor, revision)
|
||||
|
||||
class ConstInfo(object):
|
||||
def __init__(self, cname, name, val, addedManually=False):
|
||||
self.cname = cname
|
||||
@@ -719,13 +728,16 @@ $imports
|
||||
public class %(jc)s {
|
||||
""" % { 'm' : self.module, 'jc' : jname } )
|
||||
|
||||
# self.java_code[class_name]["jn_code"].write("""
|
||||
# //
|
||||
# // native stuff
|
||||
# //
|
||||
# static { System.loadLibrary("opencv_java"); }
|
||||
#""" )
|
||||
|
||||
if class_name == 'Core':
|
||||
(epoch, major, minor, revision) = getLibVersion(
|
||||
(os.path.dirname(__file__) or '.') + '/../../core/include/opencv2/core/version.hpp')
|
||||
version_str = '.'.join( (epoch, major, minor, revision) )
|
||||
version_suffix = ''.join( (epoch, major, minor) )
|
||||
self.classes[class_name].imports.add("java.lang.String")
|
||||
self.java_code[class_name]["j_code"].write("""
|
||||
public static final String VERSION = "%(v)s", NATIVE_LIBRARY_NAME = "opencv_java%(vs)s";
|
||||
public static final int VERSION_EPOCH = %(ep)s, VERSION_MAJOR = %(ma)s, VERSION_MINOR = %(mi)s, VERSION_REVISION = %(re)s;
|
||||
""" % { 'v' : version_str, 'vs' : version_suffix, 'ep' : epoch, 'ma' : major, 'mi' : minor, 're' : revision } )
|
||||
|
||||
|
||||
def add_class(self, decl):
|
||||
|
@@ -2122,12 +2122,16 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
|
||||
};
|
||||
|
||||
int dst_step1 = dst.cols * dst.elemSize();
|
||||
int src1_step = (int) src1.step;
|
||||
int src2_step = (int) src2.step;
|
||||
int dst_step = (int) dst.step;
|
||||
float alpha_f = alpha, beta_f = beta, gama_f = gama;
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset));
|
||||
|
||||
if(src1.clCxt -> impl -> double_support != 0)
|
||||
@@ -2138,14 +2142,13 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
|
||||
}
|
||||
else
|
||||
{
|
||||
float alpha_f = alpha, beta_f = beta, gama_f = gama;
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&alpha_f ));
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&beta_f ));
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&gama_f ));
|
||||
}
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
|
@@ -73,7 +73,7 @@ void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &
|
||||
size_t localSize[] = {256, 1, 1};
|
||||
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
|
||||
result.create(img1.size(), CV_MAKE_TYPE(depth,img1.channels()));
|
||||
if(globalSize[0] != 0)
|
||||
{
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data ));
|
||||
|
@@ -60,7 +60,7 @@ namespace cv
|
||||
}
|
||||
|
||||
template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
|
||||
void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &mask,
|
||||
void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
|
||||
const oclMat &trainIdx, const oclMat &distance, int distType)
|
||||
{
|
||||
cv::ocl::Context *ctx = query.clCxt;
|
||||
@@ -75,7 +75,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
|
||||
{
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
|
||||
args.push_back( std::make_pair( smemSize, (void *)NULL));
|
||||
@@ -101,7 +101,7 @@ void matchUnrolledCached(const oclMat /*query*/, const oclMat * /*trains*/, int
|
||||
}
|
||||
|
||||
template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
|
||||
void match(const oclMat &query, const oclMat &train, const oclMat &mask,
|
||||
void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
|
||||
const oclMat &trainIdx, const oclMat &distance, int distType)
|
||||
{
|
||||
cv::ocl::Context *ctx = query.clCxt;
|
||||
@@ -115,7 +115,7 @@ void match(const oclMat &query, const oclMat &train, const oclMat &mask,
|
||||
{
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
|
||||
args.push_back( std::make_pair( smemSize, (void *)NULL));
|
||||
@@ -141,7 +141,7 @@ void match(const oclMat /*query*/, const oclMat * /*trains*/, int /*n*/, const o
|
||||
|
||||
//radius_matchUnrolledCached
|
||||
template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
|
||||
void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
|
||||
void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
|
||||
const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
|
||||
{
|
||||
cv::ocl::Context *ctx = query.clCxt;
|
||||
@@ -157,7 +157,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&maxDistance ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
|
||||
@@ -181,7 +181,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
|
||||
|
||||
//radius_match
|
||||
template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
|
||||
void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
|
||||
void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
|
||||
const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
|
||||
{
|
||||
cv::ocl::Context *ctx = query.clCxt;
|
||||
@@ -196,7 +196,7 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&maxDistance ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
|
||||
@@ -470,7 +470,7 @@ void matchDispatcher(const oclMat &query, const oclMat &train, int n, float maxD
|
||||
|
||||
//knn match Dispatcher
|
||||
template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
|
||||
void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &mask,
|
||||
void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
|
||||
const oclMat &trainIdx, const oclMat &distance, int distType)
|
||||
{
|
||||
cv::ocl::Context *ctx = query.clCxt;
|
||||
@@ -485,7 +485,7 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
|
||||
{
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
|
||||
args.push_back( std::make_pair( smemSize, (void *)NULL));
|
||||
@@ -505,7 +505,7 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
|
||||
}
|
||||
|
||||
template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
|
||||
void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask,
|
||||
void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
|
||||
const oclMat &trainIdx, const oclMat &distance, int distType)
|
||||
{
|
||||
cv::ocl::Context *ctx = query.clCxt;
|
||||
@@ -519,7 +519,7 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask,
|
||||
{
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
|
||||
args.push_back( std::make_pair( smemSize, (void *)NULL));
|
||||
@@ -538,7 +538,7 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask,
|
||||
}
|
||||
|
||||
template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
|
||||
void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &allDist, int distType)
|
||||
void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &allDist, int distType)
|
||||
{
|
||||
cv::ocl::Context *ctx = query.clCxt;
|
||||
size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
@@ -552,7 +552,7 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
|
||||
{
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&allDist.data ));
|
||||
args.push_back( std::make_pair( smemSize, (void *)NULL));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_size ));
|
||||
@@ -571,7 +571,7 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
|
||||
}
|
||||
|
||||
template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
|
||||
void calcDistance(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &allDist, int distType)
|
||||
void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &allDist, int distType)
|
||||
{
|
||||
cv::ocl::Context *ctx = query.clCxt;
|
||||
size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
@@ -584,7 +584,7 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &mask,
|
||||
{
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&allDist.data ));
|
||||
args.push_back( std::make_pair( smemSize, (void *)NULL));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_size ));
|
||||
@@ -1005,6 +1005,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, cons
|
||||
|
||||
void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask)
|
||||
{
|
||||
assert(mask.empty()); // mask is not supported at the moment
|
||||
oclMat trainIdx, distance;
|
||||
matchSingle(query, train, trainIdx, distance, mask);
|
||||
matchDownload(trainIdx, distance, matches);
|
||||
@@ -1448,7 +1449,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, std::vec
|
||||
|
||||
// radiusMatchSingle
|
||||
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train,
|
||||
oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
|
||||
oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
|
||||
{
|
||||
if (query.empty() || train.empty())
|
||||
return;
|
||||
@@ -1694,4 +1695,4 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, std::
|
||||
oclMat trainIdx, imgIdx, distance, nMatches;
|
||||
radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks);
|
||||
radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult);
|
||||
}
|
||||
}
|
||||
|
@@ -288,13 +288,14 @@ namespace cv
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.cols));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.rows));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&cols));
|
||||
if(src.clCxt -> impl -> double_support != 0)
|
||||
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
|
||||
|
||||
if(src.clCxt -> impl -> double_support != 0)
|
||||
{
|
||||
args.push_back( std::make_pair(sizeof(cl_double4), (void *)&borderValue));
|
||||
}
|
||||
else
|
||||
{
|
||||
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
|
||||
args.push_back( std::make_pair(sizeof(cl_float4), (void *)&borderFloat));
|
||||
}
|
||||
}
|
||||
|
@@ -5,11 +5,13 @@ int bit1Count(float x)
|
||||
{
|
||||
int c = 0;
|
||||
int ix = (int)x;
|
||||
|
||||
for (int i = 0 ; i < 32 ; i++)
|
||||
{
|
||||
c += ix & 0x1;
|
||||
ix >>= 1;
|
||||
}
|
||||
|
||||
return (float)c;
|
||||
}
|
||||
/* 2dim launch, global size: dim0 is (query rows + block_size - 1) / block_size * block_size, dim1 is block_size
|
||||
@@ -18,7 +20,7 @@ local size: dim0 is block_size, dim1 is block_size.
|
||||
__kernel void BruteForceMatch_UnrollMatch(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
//__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
@@ -30,7 +32,7 @@ __kernel void BruteForceMatch_UnrollMatch(
|
||||
int train_cols,
|
||||
int step,
|
||||
int distType
|
||||
)
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
@@ -40,6 +42,7 @@ __kernel void BruteForceMatch_UnrollMatch(
|
||||
__local float *s_train = sharebuffer + block_size * max_desc_len;
|
||||
|
||||
int queryIdx = groupidx * block_size + lidy;
|
||||
|
||||
// load the query into local memory.
|
||||
for (int i = 0 ; i < max_desc_len / block_size; i ++)
|
||||
{
|
||||
@@ -52,9 +55,11 @@ __kernel void BruteForceMatch_UnrollMatch(
|
||||
|
||||
// loopUnrolledCached to find the best trainIdx and best distance.
|
||||
volatile int imgIdx = 0;
|
||||
|
||||
for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
|
||||
{
|
||||
float result = 0;
|
||||
|
||||
for (int i = 0 ; i < max_desc_len / block_size ; i++)
|
||||
{
|
||||
//load a block_size * block_size block into local train.
|
||||
@@ -67,28 +72,34 @@ __kernel void BruteForceMatch_UnrollMatch(
|
||||
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
switch (distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 0:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
case 1:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
|
||||
break;
|
||||
case 2:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^(uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -105,8 +116,8 @@ __kernel void BruteForceMatch_UnrollMatch(
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
__local float *s_distance = (__local float*)(sharebuffer);
|
||||
__local int* s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
|
||||
__local float *s_distance = (__local float *)(sharebuffer);
|
||||
__local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
|
||||
|
||||
//find BestMatch
|
||||
s_distance += lidy * block_size;
|
||||
@@ -136,7 +147,7 @@ __kernel void BruteForceMatch_UnrollMatch(
|
||||
__kernel void BruteForceMatch_Match(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
//__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
@@ -147,7 +158,7 @@ __kernel void BruteForceMatch_Match(
|
||||
int train_cols,
|
||||
int step,
|
||||
int distType
|
||||
)
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
@@ -166,6 +177,7 @@ __kernel void BruteForceMatch_Match(
|
||||
{
|
||||
//Dist dist;
|
||||
float result = 0;
|
||||
|
||||
for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; i++)
|
||||
{
|
||||
const int loadx = lidx + i * block_size;
|
||||
@@ -184,28 +196,34 @@ __kernel void BruteForceMatch_Match(
|
||||
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
switch (distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[(uint)j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 0:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
case 1:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
|
||||
break;
|
||||
case 2:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[(uint)j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -256,7 +274,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
float maxDistance,
|
||||
__global float *mask,
|
||||
//__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__global int *nMatches,
|
||||
@@ -271,7 +289,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
|
||||
int step,
|
||||
int ostep,
|
||||
int distType
|
||||
)
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
@@ -285,6 +303,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
|
||||
__local float *s_train = sharebuffer + block_size * block_size;
|
||||
|
||||
float result = 0;
|
||||
|
||||
for (int i = 0 ; i < max_desc_len / block_size ; ++i)
|
||||
{
|
||||
//load a block_size * block_size block into local train.
|
||||
@@ -299,27 +318,33 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
|
||||
/* there are three types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
switch (distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 0:
|
||||
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
case 1:
|
||||
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
|
||||
break;
|
||||
case 2:
|
||||
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -329,7 +354,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
|
||||
{
|
||||
unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
|
||||
|
||||
if(ind < bestTrainIdx_cols)
|
||||
if (ind < bestTrainIdx_cols)
|
||||
{
|
||||
//bestImgIdx = imgIdx;
|
||||
bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
|
||||
@@ -343,7 +368,7 @@ __kernel void BruteForceMatch_RadiusMatch(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
float maxDistance,
|
||||
__global float *mask,
|
||||
//__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__global int *nMatches,
|
||||
@@ -357,7 +382,7 @@ __kernel void BruteForceMatch_RadiusMatch(
|
||||
int step,
|
||||
int ostep,
|
||||
int distType
|
||||
)
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
@@ -371,6 +396,7 @@ __kernel void BruteForceMatch_RadiusMatch(
|
||||
__local float *s_train = sharebuffer + block_size * block_size;
|
||||
|
||||
float result = 0;
|
||||
|
||||
for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; ++i)
|
||||
{
|
||||
//load a block_size * block_size block into local train.
|
||||
@@ -385,27 +411,33 @@ __kernel void BruteForceMatch_RadiusMatch(
|
||||
/* there are three types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
switch (distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 0:
|
||||
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
case 1:
|
||||
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
|
||||
break;
|
||||
case 2:
|
||||
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -415,7 +447,7 @@ __kernel void BruteForceMatch_RadiusMatch(
|
||||
{
|
||||
unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
|
||||
|
||||
if(ind < bestTrainIdx_cols)
|
||||
if (ind < bestTrainIdx_cols)
|
||||
{
|
||||
//bestImgIdx = imgIdx;
|
||||
bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
|
||||
@@ -428,7 +460,7 @@ __kernel void BruteForceMatch_RadiusMatch(
|
||||
__kernel void BruteForceMatch_knnUnrollMatch(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
//__global float *mask,
|
||||
__global int2 *bestTrainIdx,
|
||||
__global float2 *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
@@ -440,7 +472,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
|
||||
int train_cols,
|
||||
int step,
|
||||
int distType
|
||||
)
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
@@ -464,9 +496,11 @@ __kernel void BruteForceMatch_knnUnrollMatch(
|
||||
|
||||
//loopUnrolledCached
|
||||
volatile int imgIdx = 0;
|
||||
|
||||
for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
|
||||
{
|
||||
float result = 0;
|
||||
|
||||
for (int i = 0 ; i < max_desc_len / block_size ; i++)
|
||||
{
|
||||
const int loadX = lidx + i * block_size;
|
||||
@@ -480,28 +514,34 @@ __kernel void BruteForceMatch_knnUnrollMatch(
|
||||
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
switch (distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 0:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
case 1:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
|
||||
break;
|
||||
case 2:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^(uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -549,6 +589,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
|
||||
for (int i = 0 ; i < block_size ; i++)
|
||||
{
|
||||
float val = s_distance[i];
|
||||
|
||||
if (val < bestDistance1)
|
||||
{
|
||||
bestDistance2 = bestDistance1;
|
||||
@@ -602,7 +643,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
|
||||
__kernel void BruteForceMatch_knnMatch(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
//__global float *mask,
|
||||
__global int2 *bestTrainIdx,
|
||||
__global float2 *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
@@ -613,7 +654,7 @@ __kernel void BruteForceMatch_knnMatch(
|
||||
int train_cols,
|
||||
int step,
|
||||
int distType
|
||||
)
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
@@ -632,7 +673,8 @@ __kernel void BruteForceMatch_knnMatch(
|
||||
for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
|
||||
{
|
||||
float result = 0.0f;
|
||||
for (int i = 0 ; i < (query_cols + block_size -1) / block_size ; i++)
|
||||
|
||||
for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; i++)
|
||||
{
|
||||
const int loadx = lidx + i * block_size;
|
||||
//load query and train into local memory
|
||||
@@ -650,28 +692,34 @@ __kernel void BruteForceMatch_knnMatch(
|
||||
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
switch (distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[(uint)j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 0:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
case 1:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
|
||||
break;
|
||||
case 2:
|
||||
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[(uint)j * block_size + lidx]);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -719,6 +767,7 @@ __kernel void BruteForceMatch_knnMatch(
|
||||
for (int i = 0 ; i < block_size ; i++)
|
||||
{
|
||||
float val = s_distance[i];
|
||||
|
||||
if (val < bestDistance1)
|
||||
{
|
||||
bestDistance2 = bestDistance1;
|
||||
@@ -772,7 +821,7 @@ __kernel void BruteForceMatch_knnMatch(
|
||||
kernel void BruteForceMatch_calcDistanceUnrolled(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
//__global float *mask,
|
||||
__global float *allDist,
|
||||
__local float *sharebuffer,
|
||||
int block_size,
|
||||
@@ -790,7 +839,7 @@ kernel void BruteForceMatch_calcDistanceUnrolled(
|
||||
kernel void BruteForceMatch_calcDistance(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
//__global float *mask,
|
||||
__global float *allDist,
|
||||
__local float *sharebuffer,
|
||||
int block_size,
|
||||
@@ -808,9 +857,9 @@ kernel void BruteForceMatch_findBestMatch(
|
||||
__global float *allDist,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
int k,
|
||||
int block_size
|
||||
)
|
||||
int k,
|
||||
int block_size
|
||||
)
|
||||
{
|
||||
/* Todo */
|
||||
}
|
@@ -78,7 +78,7 @@ uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int col
|
||||
|
||||
// dynamically change the precision used for floating type
|
||||
|
||||
#if defined (__ATI__) || defined (__NVIDIA__)
|
||||
#if defined DOUBLE_SUPPORT
|
||||
#define F double
|
||||
#else
|
||||
#define F float
|
||||
@@ -299,7 +299,7 @@ __kernel
|
||||
__global const float * det,
|
||||
__global const float * trace,
|
||||
__global int4 * maxPosBuffer,
|
||||
volatile __global unsigned int* maxCounter,
|
||||
volatile __global int* maxCounter,
|
||||
int counter_offset,
|
||||
int det_step, // the step of det in bytes
|
||||
int trace_step, // the step of trace in bytes
|
||||
@@ -408,7 +408,7 @@ __kernel
|
||||
|
||||
if(condmax)
|
||||
{
|
||||
unsigned int ind = atomic_inc(maxCounter);
|
||||
int ind = atomic_inc(maxCounter);
|
||||
|
||||
if (ind < c_max_candidates)
|
||||
{
|
||||
@@ -427,7 +427,7 @@ __kernel
|
||||
__global float * det,
|
||||
__global float * trace,
|
||||
__global int4 * maxPosBuffer,
|
||||
volatile __global unsigned int* maxCounter,
|
||||
volatile __global int* maxCounter,
|
||||
int counter_offset,
|
||||
int det_step, // the step of det in bytes
|
||||
int trace_step, // the step of trace in bytes
|
||||
@@ -525,7 +525,7 @@ __kernel
|
||||
|
||||
if(condmax)
|
||||
{
|
||||
unsigned int ind = atomic_inc(maxCounter);
|
||||
int ind = atomic_inc(maxCounter);
|
||||
|
||||
if (ind < c_max_candidates)
|
||||
{
|
||||
@@ -585,7 +585,7 @@ __kernel
|
||||
__global const float * det,
|
||||
__global const int4 * maxPosBuffer,
|
||||
__global float * keypoints,
|
||||
volatile __global unsigned int * featureCounter,
|
||||
volatile __global int * featureCounter,
|
||||
int det_step,
|
||||
int keypoints_step,
|
||||
int c_img_rows,
|
||||
@@ -684,7 +684,7 @@ __kernel
|
||||
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
|
||||
{
|
||||
// Get a new feature index.
|
||||
unsigned int ind = atomic_inc(featureCounter);
|
||||
int ind = atomic_inc(featureCounter);
|
||||
|
||||
if (ind < c_max_features)
|
||||
{
|
||||
@@ -737,19 +737,19 @@ __constant float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448
|
||||
__constant float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
|
||||
__constant float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
|
||||
|
||||
void reduce_32_sum(volatile __local float * data, float partial_reduction, int tid)
|
||||
void reduce_32_sum(volatile __local float * data, volatile float* partial_reduction, int tid)
|
||||
{
|
||||
#define op(A, B) (A)+(B)
|
||||
data[tid] = partial_reduction;
|
||||
#define op(A, B) (*A)+(B)
|
||||
data[tid] = *partial_reduction;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]);
|
||||
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 16]);
|
||||
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 8 ]);
|
||||
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 4 ]);
|
||||
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 2 ]);
|
||||
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 1 ]);
|
||||
}
|
||||
#undef op
|
||||
}
|
||||
@@ -831,7 +831,7 @@ __kernel
|
||||
{
|
||||
const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC;
|
||||
|
||||
float sumx = 0.0f, sumy = 0.0f;
|
||||
volatile float sumx = 0.0f, sumy = 0.0f;
|
||||
int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
@@ -856,8 +856,8 @@ __kernel
|
||||
sumx += s_X[get_local_id(0) + 96];
|
||||
sumy += s_Y[get_local_id(0) + 96];
|
||||
}
|
||||
reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
|
||||
reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
|
||||
reduce_32_sum(s_sumx + get_local_id(1) * 32, &sumx, get_local_id(0));
|
||||
reduce_32_sum(s_sumy + get_local_id(1) * 32, &sumy, get_local_id(0));
|
||||
|
||||
const float temp_mod = sumx * sumx + sumy * sumy;
|
||||
if (temp_mod > best_mod)
|
||||
@@ -892,14 +892,32 @@ __kernel
|
||||
kp_dir += 2.0f * CV_PI_F;
|
||||
kp_dir *= 180.0f / CV_PI_F;
|
||||
|
||||
kp_dir = 360.0f - kp_dir;
|
||||
if (fabs(kp_dir - 360.f) < FLT_EPSILON)
|
||||
kp_dir = 0.f;
|
||||
//kp_dir = 360.0f - kp_dir;
|
||||
//if (fabs(kp_dir - 360.f) < FLT_EPSILON)
|
||||
// kp_dir = 0.f;
|
||||
|
||||
featureDir[get_group_id(0)] = kp_dir;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel
|
||||
void icvSetUpright(
|
||||
__global float * keypoints,
|
||||
int keypoints_step,
|
||||
int nFeatures
|
||||
)
|
||||
{
|
||||
keypoints_step /= sizeof(*keypoints);
|
||||
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
|
||||
|
||||
if(get_global_id(0) <= nFeatures)
|
||||
{
|
||||
featureDir[get_global_id(0)] = 90.0f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#undef ORI_SEARCH_INC
|
||||
#undef ORI_WIN
|
||||
#undef ORI_SAMPLES
|
||||
@@ -993,10 +1011,7 @@ void calc_dx_dy(
|
||||
const float centerX = featureX[get_group_id(0)];
|
||||
const float centerY = featureY[get_group_id(0)];
|
||||
const float size = featureSize[get_group_id(0)];
|
||||
float descriptor_dir = 360.0f - featureDir[get_group_id(0)];
|
||||
if (fabs(descriptor_dir - 360.f) < FLT_EPSILON)
|
||||
descriptor_dir = 0.f;
|
||||
descriptor_dir *= (float)(CV_PI_F / 180.0f);
|
||||
float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f);
|
||||
|
||||
/* The sampling intervals and wavelet sized for selecting an orientation
|
||||
and building the keypoint descriptor are defined relative to 's' */
|
||||
@@ -1125,11 +1140,15 @@ __kernel
|
||||
{
|
||||
sdxabs[tid] = fabs(sdx[tid]); // |dx| array
|
||||
sdyabs[tid] = fabs(sdy[tid]); // |dy| array
|
||||
//barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 25)
|
||||
{
|
||||
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
|
||||
//barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 25)
|
||||
{
|
||||
volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
|
||||
|
||||
// write dx, dy, |dx|, |dy|
|
||||
|
@@ -140,6 +140,10 @@ float reduce_smem(volatile __local float* smem, int size)
|
||||
if (tid < 32)
|
||||
{
|
||||
if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16)
|
||||
{
|
||||
if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
|
||||
if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
|
||||
if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
|
||||
@@ -224,6 +228,11 @@ __kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr
|
||||
{
|
||||
volatile __local float* smem = products;
|
||||
smem[tid] = product = product + smem[tid + 32];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16)
|
||||
{
|
||||
volatile __local float* smem = products;
|
||||
smem[tid] = product = product + smem[tid + 16];
|
||||
smem[tid] = product = product + smem[tid + 8];
|
||||
smem[tid] = product = product + smem[tid + 4];
|
||||
|
@@ -56,6 +56,21 @@ namespace cv
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *nonfree_surf;
|
||||
|
||||
const char* noImage2dOption = "-D DISABLE_IMAGE2D";
|
||||
|
||||
static void openCLExecuteKernelSURF(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
|
||||
{
|
||||
if(support_image2d())
|
||||
{
|
||||
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, noImage2dOption);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,10 +94,6 @@ static inline int calcSize(int octave, int layer)
|
||||
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
const char* noImage2dOption = "-D DISABLE_IMAGE2D";
|
||||
}
|
||||
|
||||
class SURF_OCL_Invoker
|
||||
{
|
||||
@@ -99,15 +110,16 @@ public:
|
||||
void icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
|
||||
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
|
||||
|
||||
void icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter,
|
||||
void icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
|
||||
oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures);
|
||||
|
||||
void icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures);
|
||||
|
||||
void icvSetUpright_gpu(const oclMat &keypoints, int nFeatures);
|
||||
|
||||
void compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures);
|
||||
// end of kernel callers declarations
|
||||
|
||||
|
||||
SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) :
|
||||
surf_(surf),
|
||||
img_cols(img.cols), img_rows(img.rows),
|
||||
@@ -181,8 +193,8 @@ public:
|
||||
icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
|
||||
octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
|
||||
|
||||
unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
|
||||
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
|
||||
int maxCounter = ((Mat)counters).at<int>(1 + octave);
|
||||
maxCounter = std::min(maxCounter, static_cast<int>(maxCandidates));
|
||||
|
||||
if (maxCounter > 0)
|
||||
{
|
||||
@@ -190,15 +202,29 @@ public:
|
||||
keypoints, counters, octave, layer_rows, maxFeatures);
|
||||
}
|
||||
}
|
||||
unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
|
||||
featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
|
||||
int featureCounter = Mat(counters).at<int>(0);
|
||||
featureCounter = std::min(featureCounter, static_cast<int>(maxFeatures));
|
||||
|
||||
keypoints.cols = featureCounter;
|
||||
|
||||
if (surf_.upright)
|
||||
keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
|
||||
{
|
||||
//keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
|
||||
setUpright(keypoints);
|
||||
}
|
||||
else
|
||||
{
|
||||
findOrientation(keypoints);
|
||||
}
|
||||
}
|
||||
|
||||
void setUpright(oclMat &keypoints)
|
||||
{
|
||||
const int nFeatures = keypoints.cols;
|
||||
if(nFeatures > 0)
|
||||
{
|
||||
icvSetUpright_gpu(keypoints, keypoints.cols);
|
||||
}
|
||||
}
|
||||
|
||||
void findOrientation(oclMat &keypoints)
|
||||
@@ -483,14 +509,7 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
|
||||
divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
|
||||
1
|
||||
};
|
||||
if(support_image2d())
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
|
||||
}
|
||||
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
|
||||
@@ -536,17 +555,10 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat
|
||||
1
|
||||
};
|
||||
|
||||
if(support_image2d())
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
|
||||
}
|
||||
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter,
|
||||
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
|
||||
oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures)
|
||||
{
|
||||
Context *clCxt = det.clCxt;
|
||||
@@ -568,14 +580,7 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa
|
||||
size_t localThreads[3] = {3, 3, 3};
|
||||
size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1};
|
||||
|
||||
if(support_image2d())
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
|
||||
}
|
||||
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures)
|
||||
@@ -602,16 +607,27 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
|
||||
size_t localThreads[3] = {32, 4, 1};
|
||||
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
|
||||
|
||||
if(support_image2d())
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
|
||||
{
|
||||
Context *clCxt = counters.clCxt;
|
||||
std::string kernelName = "icvSetUpright";
|
||||
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&nFeatures));
|
||||
|
||||
size_t localThreads[3] = {256, 1, 1};
|
||||
size_t globalThreads[3] = {nFeatures, 1, 1};
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
|
||||
void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures)
|
||||
{
|
||||
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
|
||||
@@ -647,14 +663,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
|
||||
if(support_image2d())
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
|
||||
}
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
kernelName = "normalize_descriptors64";
|
||||
|
||||
@@ -667,14 +677,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
|
||||
args.clear();
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
|
||||
if(support_image2d())
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
|
||||
}
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -702,14 +706,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
|
||||
if(support_image2d())
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
|
||||
}
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
kernelName = "normalize_descriptors128";
|
||||
|
||||
@@ -722,14 +720,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
|
||||
args.clear();
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
|
||||
if(support_image2d())
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1, noImage2dOption);
|
||||
}
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user