From 44eaeee8655b57555b13f671554946bdf07e35c1 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <no@email>
Date: Tue, 24 Apr 2012 07:56:33 +0000
Subject: [PATCH] refactored gpu BruteForceMather (made it similar to
 BFMatcher)

---
 modules/gpu/include/opencv2/gpu/gpu.hpp |  32 +--
 modules/gpu/perf/perf_features2d.cpp    |   6 +-
 modules/gpu/src/brute_force_matcher.cpp | 355 +++++++++++++-----------
 modules/gpu/test/test_features2d.cpp    |  36 ++-
 modules/stitching/src/matchers.cpp      |   2 +-
 samples/gpu/performance/tests.cpp       |   2 +-
 samples/gpu/surf_keypoint_matcher.cpp   |   4 +-
 7 files changed, 214 insertions(+), 223 deletions(-)

diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index bf20512fb..5a95c5e9d 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -1220,12 +1220,10 @@ protected:
 
 ////////////////////////////////// BruteForceMatcher //////////////////////////////////
 
-class CV_EXPORTS BruteForceMatcher_GPU_base
+class CV_EXPORTS BFMatcher_GPU
 {
 public:
-    enum DistType {L1Dist = 0, L2Dist, HammingDist};
-
-    explicit BruteForceMatcher_GPU_base(DistType distType = L2Dist);
+    explicit BFMatcher_GPU(int norm = cv::NORM_L2);
 
     // Add descriptors to train descriptor collection
     void add(const std::vector<GpuMat>& descCollection);
@@ -1367,36 +1365,12 @@ public:
     void radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
         const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
 
-    DistType distType;
+    int norm;
 
 private:
     std::vector<GpuMat> trainDescCollection;
 };
 
-template <class Distance>
-class CV_EXPORTS BruteForceMatcher_GPU;
-
-template <typename T>
-class CV_EXPORTS BruteForceMatcher_GPU< L1<T> > : public BruteForceMatcher_GPU_base
-{
-public:
-    explicit BruteForceMatcher_GPU() : BruteForceMatcher_GPU_base(L1Dist) {}
-    explicit BruteForceMatcher_GPU(L1<T> /*d*/) : BruteForceMatcher_GPU_base(L1Dist) {}
-};
-template <typename T>
-class CV_EXPORTS BruteForceMatcher_GPU< L2<T> > : public BruteForceMatcher_GPU_base
-{
-public:
-    explicit BruteForceMatcher_GPU() : BruteForceMatcher_GPU_base(L2Dist) {}
-    explicit BruteForceMatcher_GPU(L2<T> /*d*/) : BruteForceMatcher_GPU_base(L2Dist) {}
-};
-template <> class CV_EXPORTS BruteForceMatcher_GPU< Hamming > : public BruteForceMatcher_GPU_base
-{
-public:
-    explicit BruteForceMatcher_GPU() : BruteForceMatcher_GPU_base(HammingDist) {}
-    explicit BruteForceMatcher_GPU(Hamming /*d*/) : BruteForceMatcher_GPU_base(HammingDist) {}
-};
-
 ////////////////////////////////// CascadeClassifier_GPU //////////////////////////////////////////
 // The cascade classifier class for object detection.
 class CV_EXPORTS CascadeClassifier_GPU
diff --git a/modules/gpu/perf/perf_features2d.cpp b/modules/gpu/perf/perf_features2d.cpp
index 62b5819b8..18e149787 100644
--- a/modules/gpu/perf/perf_features2d.cpp
+++ b/modules/gpu/perf/perf_features2d.cpp
@@ -21,7 +21,7 @@ GPU_PERF_TEST(BruteForceMatcher_match, cv::gpu::DeviceInfo, int)
     cv::gpu::GpuMat train(train_host);
     cv::gpu::GpuMat trainIdx, distance;
 
-    cv::gpu::BruteForceMatcher_GPU< cv::L2<float> > matcher;
+    cv::gpu::BFMatcher_GPU matcher(cv::NORM_L2);
 
     declare.time(3.0);
 
@@ -55,7 +55,7 @@ GPU_PERF_TEST(BruteForceMatcher_knnMatch, cv::gpu::DeviceInfo, int, int)
     cv::gpu::GpuMat train(train_host);
     cv::gpu::GpuMat trainIdx, distance, allDist;
 
-    cv::gpu::BruteForceMatcher_GPU< cv::L2<float> > matcher;
+    cv::gpu::BFMatcher_GPU matcher(cv::NORM_L2);
 
     declare.time(3.0);
 
@@ -90,7 +90,7 @@ GPU_PERF_TEST(BruteForceMatcher_radiusMatch, cv::gpu::DeviceInfo, int)
     cv::gpu::GpuMat train(train_host);
     cv::gpu::GpuMat trainIdx, nMatches, distance;
 
-    cv::gpu::BruteForceMatcher_GPU< cv::L2<float> > matcher;
+    cv::gpu::BFMatcher_GPU matcher(cv::NORM_L2);
 
     declare.time(3.0);
 
diff --git a/modules/gpu/src/brute_force_matcher.cpp b/modules/gpu/src/brute_force_matcher.cpp
index 8270dfe79..ce3e0289f 100644
--- a/modules/gpu/src/brute_force_matcher.cpp
+++ b/modules/gpu/src/brute_force_matcher.cpp
@@ -48,37 +48,37 @@ using namespace std;
 
 #if !defined (HAVE_CUDA)
 
-cv::gpu::BruteForceMatcher_GPU_base::BruteForceMatcher_GPU_base(DistType) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::add(const vector<GpuMat>&) { throw_nogpu(); }
-const vector<GpuMat>& cv::gpu::BruteForceMatcher_GPU_base::getTrainDescriptors() const { throw_nogpu(); return trainDescCollection; }
-void cv::gpu::BruteForceMatcher_GPU_base::clear() { throw_nogpu(); }
-bool cv::gpu::BruteForceMatcher_GPU_base::empty() const { throw_nogpu(); return true; }
-bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const { throw_nogpu(); return true; }
-void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat&, const GpuMat&, vector<DMatch>&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat&, const Mat&, vector<DMatch>&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat&, const GpuMat&, vector<DMatch>&, const GpuMat&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat&, GpuMat&, const vector<GpuMat>&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, vector<DMatch>&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat&, const Mat&, const Mat&, vector<DMatch>&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat&, vector<DMatch>&, const vector<GpuMat>&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, int, const GpuMat&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Download(const GpuMat&, const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Convert(const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, vector< vector<DMatch> >&, int, const vector<GpuMat>&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, float, const GpuMat&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const vector<GpuMat>&, Stream&) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vector<DMatch> >&, float, const vector<GpuMat>&, bool) { throw_nogpu(); }
+cv::gpu::BFMatcher_GPU::BFMatcher_GPU(int) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::add(const vector<GpuMat>&) { throw_nogpu(); }
+const vector<GpuMat>& cv::gpu::BFMatcher_GPU::getTrainDescriptors() const { throw_nogpu(); return trainDescCollection; }
+void cv::gpu::BFMatcher_GPU::clear() { throw_nogpu(); }
+bool cv::gpu::BFMatcher_GPU::empty() const { throw_nogpu(); return true; }
+bool cv::gpu::BFMatcher_GPU::isMaskSupported() const { throw_nogpu(); return true; }
+void cv::gpu::BFMatcher_GPU::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::matchDownload(const GpuMat&, const GpuMat&, vector<DMatch>&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::matchConvert(const Mat&, const Mat&, vector<DMatch>&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::match(const GpuMat&, const GpuMat&, vector<DMatch>&, const GpuMat&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::makeGpuCollection(GpuMat&, GpuMat&, const vector<GpuMat>&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, vector<DMatch>&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::matchConvert(const Mat&, const Mat&, const Mat&, vector<DMatch>&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::match(const GpuMat&, vector<DMatch>&, const vector<GpuMat>&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::knnMatchDownload(const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::knnMatchConvert(const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::knnMatch(const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, int, const GpuMat&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::knnMatch2Download(const GpuMat&, const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::knnMatch2Convert(const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::knnMatch(const GpuMat&, vector< vector<DMatch> >&, int, const vector<GpuMat>&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::radiusMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::radiusMatch(const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, float, const GpuMat&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const vector<GpuMat>&, Stream&) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
+void cv::gpu::BFMatcher_GPU::radiusMatch(const GpuMat&, vector< vector<DMatch> >&, float, const vector<GpuMat>&, bool) { throw_nogpu(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -159,31 +159,31 @@ namespace cv { namespace gpu { namespace device
 ////////////////////////////////////////////////////////////////////
 // Train collection
 
-cv::gpu::BruteForceMatcher_GPU_base::BruteForceMatcher_GPU_base(DistType distType_) : distType(distType_)
+cv::gpu::BFMatcher_GPU::BFMatcher_GPU(int norm_) : norm(norm_)
 {
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::add(const vector<GpuMat>& descCollection)
+void cv::gpu::BFMatcher_GPU::add(const vector<GpuMat>& descCollection)
 {
     trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
 }
 
-const vector<GpuMat>& cv::gpu::BruteForceMatcher_GPU_base::getTrainDescriptors() const
+const vector<GpuMat>& cv::gpu::BFMatcher_GPU::getTrainDescriptors() const
 {
     return trainDescCollection;
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::clear()
+void cv::gpu::BFMatcher_GPU::clear()
 {
     trainDescCollection.clear();
 }
 
-bool cv::gpu::BruteForceMatcher_GPU_base::empty() const
+bool cv::gpu::BFMatcher_GPU::empty() const
 {
     return trainDescCollection.empty();
 }
 
-bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const
+bool cv::gpu::BFMatcher_GPU::isMaskSupported() const
 {
     return true;
 }
@@ -191,47 +191,51 @@ bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const
 ////////////////////////////////////////////////////////////////////
 // Match
 
-void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const GpuMat& train,
+void cv::gpu::BFMatcher_GPU::matchSingle(const GpuMat& query, const GpuMat& train,
     GpuMat& trainIdx, GpuMat& distance,
     const GpuMat& mask, Stream& stream)
 {
     if (query.empty() || train.empty())
         return;
 
-    using namespace ::cv::gpu::device::bf_match;
+    using namespace cv::gpu::device::bf_match;
 
     typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
                              const DevMem2Di& trainIdx, const DevMem2Df& distance,
                              int cc, cudaStream_t stream);
 
-    static const caller_t callers[3][6] =
+    static const caller_t callersL1[] =
     {
-        {
-            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-            matchL1_gpu<int>, matchL1_gpu<float>
-        },
-        {
-            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
-        },
-        {
-            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-        }
+        matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
+        matchL1_gpu<unsigned short>, matchL1_gpu<short>,
+        matchL1_gpu<int>, matchL1_gpu<float>
+    };
+    static const caller_t callersL2[] =
+    {
+        0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
+        0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
+        0/*matchL2_gpu<int>*/, matchL2_gpu<float>
+    };
+
+    static const caller_t callersHamming[] =
+    {
+        matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
+        matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
+        matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
     };
 
     CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
     CV_Assert(train.cols == query.cols && train.type() == query.type());
+    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
+
+    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
 
     const int nQuery = query.rows;
 
     ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx);
     ensureSizeIsEnough(1, nQuery, CV_32F, distance);
 
-    caller_t func = callers[distType][query.depth()];
+    caller_t func = callers[query.depth()];
     CV_Assert(func != 0);
 
     DeviceInfo info;
@@ -240,7 +244,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const
     func(query, train, mask, trainIdx, distance, cc, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& distance, vector<DMatch>& matches)
+void cv::gpu::BFMatcher_GPU::matchDownload(const GpuMat& trainIdx, const GpuMat& distance, vector<DMatch>& matches)
 {
     if (trainIdx.empty() || distance.empty())
         return;
@@ -251,7 +255,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
     matchConvert(trainIdxCPU, distanceCPU, matches);
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat& trainIdx, const Mat& distance, vector<DMatch>& matches)
+void cv::gpu::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& distance, vector<DMatch>& matches)
 {
     if (trainIdx.empty() || distance.empty())
         return;
@@ -281,7 +285,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat& trainIdx, cons
     }
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat& query, const GpuMat& train,
+void cv::gpu::BFMatcher_GPU::match(const GpuMat& query, const GpuMat& train,
     vector<DMatch>& matches, const GpuMat& mask)
 {
     GpuMat trainIdx, distance;
@@ -289,7 +293,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat& query, const GpuMa
     matchDownload(trainIdx, distance, matches);
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection,
+void cv::gpu::BFMatcher_GPU::makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection,
     const vector<GpuMat>& masks)
 {
     if (empty())
@@ -333,39 +337,42 @@ void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat& trainCollect
     }
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, const GpuMat& trainCollection,
+void cv::gpu::BFMatcher_GPU::matchCollection(const GpuMat& query, const GpuMat& trainCollection,
     GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
     const GpuMat& masks, Stream& stream)
 {
     if (query.empty() || trainCollection.empty())
         return;
 
-    using namespace ::cv::gpu::device::bf_match;
+    using namespace cv::gpu::device::bf_match;
 
     typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
                              const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
                              int cc, cudaStream_t stream);
 
-    static const caller_t callers[3][6] =
+    static const caller_t callersL1[] =
     {
-        {
-            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-            matchL1_gpu<int>, matchL1_gpu<float>
-        },
-        {
-            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
-        },
-        {
-            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-        }
+        matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
+        matchL1_gpu<unsigned short>, matchL1_gpu<short>,
+        matchL1_gpu<int>, matchL1_gpu<float>
+    };
+    static const caller_t callersL2[] =
+    {
+        0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
+        0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
+        0/*matchL2_gpu<int>*/, matchL2_gpu<float>
+    };
+    static const caller_t callersHamming[] =
+    {
+        matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
+        matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
+        matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
     };
 
     CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
+    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
+
+    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
 
     const int nQuery = query.rows;
 
@@ -373,7 +380,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
     ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx);
     ensureSizeIsEnough(1, nQuery, CV_32F, distance);
 
-    caller_t func = callers[distType][query.depth()];
+    caller_t func = callers[query.depth()];
     CV_Assert(func != 0);
 
     DeviceInfo info;
@@ -382,7 +389,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
     func(query, trainCollection, masks, trainIdx, imgIdx, distance, cc, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, vector<DMatch>& matches)
+void cv::gpu::BFMatcher_GPU::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, vector<DMatch>& matches)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty())
         return;
@@ -394,7 +401,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
     matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches)
+void cv::gpu::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty())
         return;
@@ -428,7 +435,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat& trainIdx, cons
     }
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat& query, vector<DMatch>& matches, const vector<GpuMat>& masks)
+void cv::gpu::BFMatcher_GPU::match(const GpuMat& query, vector<DMatch>& matches, const vector<GpuMat>& masks)
 {
     GpuMat trainCollection;
     GpuMat maskCollection;
@@ -444,40 +451,43 @@ void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat& query, vector<DMat
 ////////////////////////////////////////////////////////////////////
 // KnnMatch
 
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, const GpuMat& train,
+void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat& train,
     GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
     const GpuMat& mask, Stream& stream)
 {
     if (query.empty() || train.empty())
         return;
 
-    using namespace ::cv::gpu::device::bf_knnmatch;
+    using namespace cv::gpu::device::bf_knnmatch;
 
     typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
                              const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
                              int cc, cudaStream_t stream);
 
-    static const caller_t callers[3][6] =
+    static const caller_t callersL1[] =
     {
-        {
-            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-            matchL1_gpu<int>, matchL1_gpu<float>
-        },
-        {
-            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
-        },
-        {
-            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-        }
+        matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
+        matchL1_gpu<unsigned short>, matchL1_gpu<short>,
+        matchL1_gpu<int>, matchL1_gpu<float>
+    };
+    static const caller_t callersL2[] =
+    {
+        0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
+        0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
+        0/*matchL2_gpu<int>*/, matchL2_gpu<float>
+    };
+    static const caller_t callersHamming[] =
+    {
+        matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
+        matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
+        matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
     };
 
     CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
     CV_Assert(train.type() == query.type() && train.cols == query.cols);
+    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
+
+    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
 
     const int nQuery = query.rows;
     const int nTrain = train.rows;
@@ -499,7 +509,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
     else
         trainIdx.setTo(Scalar::all(-1));
 
-    caller_t func = callers[distType][query.depth()];
+    caller_t func = callers[query.depth()];
     CV_Assert(func != 0);
 
     DeviceInfo info;
@@ -508,7 +518,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
     func(query, train, k, mask, trainIdx, distance, allDist, cc, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
+void cv::gpu::BFMatcher_GPU::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
     vector< vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || distance.empty())
@@ -520,7 +530,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId
     knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, const Mat& distance,
+void cv::gpu::BFMatcher_GPU::knnMatchConvert(const Mat& trainIdx, const Mat& distance,
     vector< vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || distance.empty())
@@ -565,7 +575,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c
     }
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& query, const GpuMat& train,
+void cv::gpu::BFMatcher_GPU::knnMatch(const GpuMat& query, const GpuMat& train,
     vector< vector<DMatch> >& matches, int k, const GpuMat& mask, bool compactResult)
 {
     GpuMat trainIdx, distance, allDist;
@@ -573,39 +583,42 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& query, const Gp
     knnMatchDownload(trainIdx, distance, matches, compactResult);
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
+void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
     GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
     const GpuMat& maskCollection, Stream& stream)
 {
     if (query.empty() || trainCollection.empty())
         return;
 
-    using namespace ::cv::gpu::device::bf_knnmatch;
+    using namespace cv::gpu::device::bf_knnmatch;
 
     typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
                              const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
                              int cc, cudaStream_t stream);
 
-    static const caller_t callers[3][6] =
+    static const caller_t callersL1[] =
     {
-        {
-            match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
-            match2L1_gpu<unsigned short>, match2L1_gpu<short>,
-            match2L1_gpu<int>, match2L1_gpu<float>
-        },
-        {
-            0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
-            0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
-            0/*match2L2_gpu<int>*/, match2L2_gpu<float>
-        },
-        {
-            match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
-            match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
-            match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
-        }
+        match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
+        match2L1_gpu<unsigned short>, match2L1_gpu<short>,
+        match2L1_gpu<int>, match2L1_gpu<float>
+    };
+    static const caller_t callersL2[] =
+    {
+        0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
+        0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
+        0/*match2L2_gpu<int>*/, match2L2_gpu<float>
+    };
+    static const caller_t callersHamming[] =
+    {
+        match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
+        match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
+        match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
     };
 
     CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
+    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
+
+    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
 
     const int nQuery = query.rows;
 
@@ -618,7 +631,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
     else
         trainIdx.setTo(Scalar::all(-1));
 
-    caller_t func = callers[distType][query.depth()];
+    caller_t func = callers[query.depth()];
     CV_Assert(func != 0);
 
     DeviceInfo info;
@@ -627,7 +640,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
     func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
+void cv::gpu::BFMatcher_GPU::knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
     vector< vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty())
@@ -640,7 +653,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Download(const GpuMat& trainI
     knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult);
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
+void cv::gpu::BFMatcher_GPU::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
     vector< vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty())
@@ -696,7 +709,7 @@ namespace
     };
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& query, vector< vector<DMatch> >& matches, int k,
+void cv::gpu::BFMatcher_GPU::knnMatch(const GpuMat& query, vector< vector<DMatch> >& matches, int k,
     const vector<GpuMat>& masks, bool compactResult)
 {
     if (k == 2)
@@ -754,7 +767,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& query, vector<
 ////////////////////////////////////////////////////////////////////
 // RadiusMatch
 
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, const GpuMat& train,
+void cv::gpu::BFMatcher_GPU::radiusMatchSingle(const GpuMat& query, const GpuMat& train,
     GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
     const GpuMat& mask, Stream& stream)
 {
@@ -767,23 +780,23 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
                              const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
                              int cc, cudaStream_t stream);
 
-    static const caller_t callers[3][6] =
+    static const caller_t callersL1[] =
     {
-        {
-            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-            matchL1_gpu<int>, matchL1_gpu<float>
-        },
-        {
-            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
-        },
-        {
-            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-        }
+        matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
+        matchL1_gpu<unsigned short>, matchL1_gpu<short>,
+        matchL1_gpu<int>, matchL1_gpu<float>
+    };
+    static const caller_t callersL2[] =
+    {
+        0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
+        0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
+        0/*matchL2_gpu<int>*/, matchL2_gpu<float>
+    };
+    static const caller_t callersHamming[] =
+    {
+        matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
+        matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
+        matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
     };
 
     DeviceInfo info;
@@ -798,6 +811,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
     CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
     CV_Assert(train.type() == query.type() && train.cols == query.cols);
     CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size()));
+    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
+
+    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
 
     ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
     if (trainIdx.empty())
@@ -811,13 +827,13 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
     else
         nMatches.setTo(Scalar::all(0));
 
-    caller_t func = callers[distType][query.depth()];
+    caller_t func = callers[query.depth()];
     CV_Assert(func != 0);
 
     func(query, train, maxDistance, mask, trainIdx, distance, nMatches, cc, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
+void cv::gpu::BFMatcher_GPU::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
     vector< vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || distance.empty() || nMatches.empty())
@@ -830,7 +846,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai
     radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
+void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
     vector< vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || distance.empty() || nMatches.empty())
@@ -879,7 +895,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx
     }
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& query, const GpuMat& train,
+void cv::gpu::BFMatcher_GPU::radiusMatch(const GpuMat& query, const GpuMat& train,
     vector< vector<DMatch> >& matches, float maxDistance, const GpuMat& mask, bool compactResult)
 {
     GpuMat trainIdx, distance, nMatches;
@@ -887,7 +903,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& query, const
     radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult);
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches,
+void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches,
     float maxDistance, const vector<GpuMat>& masks, Stream& stream)
 {
     if (query.empty() || empty())
@@ -899,23 +915,23 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
                              const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
                              int cc, cudaStream_t stream);
 
-    static const caller_t callers[3][6] =
+    static const caller_t callersL1[] =
     {
-        {
-            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-            matchL1_gpu<int>, matchL1_gpu<float>
-        },
-        {
-            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
-        },
-        {
-            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-        }
+        matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
+        matchL1_gpu<unsigned short>, matchL1_gpu<short>,
+        matchL1_gpu<int>, matchL1_gpu<float>
+    };
+    static const caller_t callersL2[] =
+    {
+        0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
+        0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
+        0/*matchL2_gpu<int>*/, matchL2_gpu<float>
+    };
+    static const caller_t callersHamming[] =
+    {
+        matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
+        matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
+        matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
     };
 
     DeviceInfo info;
@@ -928,6 +944,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
 
     CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
     CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size()));
+    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
+
+    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
 
     ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
     if (trainIdx.empty())
@@ -942,7 +961,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
     else
         nMatches.setTo(Scalar::all(0));
 
-    caller_t func = callers[distType][query.depth()];
+    caller_t func = callers[query.depth()];
     CV_Assert(func != 0);
 
     vector<DevMem2Db> trains_(trainDescCollection.begin(), trainDescCollection.end());
@@ -952,7 +971,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
         trainIdx, imgIdx, distance, nMatches, cc, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
+void cv::gpu::BFMatcher_GPU::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
     vector< vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
@@ -966,7 +985,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai
     radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
+void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
     vector< vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
@@ -1018,7 +1037,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx
     }
 }
 
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& query, vector< vector<DMatch> >& matches,
+void cv::gpu::BFMatcher_GPU::radiusMatch(const GpuMat& query, vector< vector<DMatch> >& matches,
     float maxDistance, const vector<GpuMat>& masks, bool compactResult)
 {
     GpuMat trainIdx, imgIdx, distance, nMatches;
diff --git a/modules/gpu/test/test_features2d.cpp b/modules/gpu/test/test_features2d.cpp
index fc5cb30e8..85996f713 100644
--- a/modules/gpu/test/test_features2d.cpp
+++ b/modules/gpu/test/test_features2d.cpp
@@ -503,13 +503,12 @@ INSTANTIATE_TEST_CASE_P(GPU_Features2D, ORB,  testing::Combine(
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // BruteForceMatcher
 
-CV_ENUM(DistType, cv::gpu::BruteForceMatcher_GPU_base::L1Dist, cv::gpu::BruteForceMatcher_GPU_base::L2Dist, cv::gpu::BruteForceMatcher_GPU_base::HammingDist)
 IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
 
-PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, DistType, DescriptorSize)
+PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, NormCode, DescriptorSize)
 {
     cv::gpu::DeviceInfo devInfo;
-    cv::gpu::BruteForceMatcher_GPU_base::DistType distType;
+    int normCode;
     int dim;
 
     int queryDescCount;
@@ -520,7 +519,7 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, DistType, DescriptorSize
     virtual void SetUp()
     {
         devInfo = GET_PARAM(0);
-        distType = (cv::gpu::BruteForceMatcher_GPU_base::DistType)(int)GET_PARAM(1);
+        normCode = GET_PARAM(1);
         dim = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
@@ -566,7 +565,7 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, DistType, DescriptorSize
 
 TEST_P(BruteForceMatcher, Match)
 {
-    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+    cv::gpu::BFMatcher_GPU matcher(normCode);
 
     std::vector<cv::DMatch> matches;
     matcher.match(loadMat(query), loadMat(train), matches);
@@ -584,10 +583,9 @@ TEST_P(BruteForceMatcher, Match)
     ASSERT_EQ(0, badCount);
 }
 
-
 TEST_P(BruteForceMatcher, MatchAdd)
 {
-    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+    cv::gpu::BFMatcher_GPU matcher(normCode);
 
     cv::gpu::GpuMat d_train(train);
 
@@ -638,9 +636,9 @@ TEST_P(BruteForceMatcher, MatchAdd)
 
 TEST_P(BruteForceMatcher, KnnMatch2)
 {
-    const int knn = 2;
+    cv::gpu::BFMatcher_GPU matcher(normCode);
 
-    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+    const int knn = 2;
 
     std::vector< std::vector<cv::DMatch> > matches;
     matcher.knnMatch(loadMat(query), loadMat(train), matches, knn);
@@ -670,7 +668,7 @@ TEST_P(BruteForceMatcher, KnnMatch2)
 
 TEST_P(BruteForceMatcher, KnnMatch3)
 {
-    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+    cv::gpu::BFMatcher_GPU matcher(normCode);
 
     const int knn = 3;
 
@@ -702,9 +700,9 @@ TEST_P(BruteForceMatcher, KnnMatch3)
 
 TEST_P(BruteForceMatcher, KnnMatchAdd2)
 {
-    const int knn = 2;
+    cv::gpu::BFMatcher_GPU matcher(normCode);
 
-    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+    const int knn = 2;
 
     cv::gpu::GpuMat d_train(train);
 
@@ -761,9 +759,9 @@ TEST_P(BruteForceMatcher, KnnMatchAdd2)
 
 TEST_P(BruteForceMatcher, KnnMatchAdd3)
 {
-    const int knn = 3;
+    cv::gpu::BFMatcher_GPU matcher(normCode);
 
-    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+    const int knn = 3;
 
     cv::gpu::GpuMat d_train(train);
 
@@ -819,9 +817,9 @@ TEST_P(BruteForceMatcher, KnnMatchAdd3)
 
 TEST_P(BruteForceMatcher, RadiusMatch)
 {
-    const float radius = 1.f / countFactor;
+    cv::gpu::BFMatcher_GPU matcher(normCode);
 
-    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+    const float radius = 1.f / countFactor;
 
     if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
     {
@@ -861,11 +859,11 @@ TEST_P(BruteForceMatcher, RadiusMatch)
 
 TEST_P(BruteForceMatcher, RadiusMatchAdd)
 {
+    cv::gpu::BFMatcher_GPU matcher(normCode);
+
     const int n = 3;
     const float radius = 1.f / countFactor * n;
 
-    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
-
     cv::gpu::GpuMat d_train(train);
 
     // make add() twice to test such case
@@ -936,7 +934,7 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd)
 
 INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
     ALL_DEVICES,
-    testing::Values(DistType(cv::gpu::BruteForceMatcher_GPU_base::L1Dist), DistType(cv::gpu::BruteForceMatcher_GPU_base::L2Dist)),
+    testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2)),
     testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304))));
 
 } // namespace
diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp
index 9cfd343f8..854db008c 100644
--- a/modules/stitching/src/matchers.cpp
+++ b/modules/stitching/src/matchers.cpp
@@ -219,7 +219,7 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
     descriptors1_.upload(features1.descriptors);
     descriptors2_.upload(features2.descriptors);
 
-    BruteForceMatcher_GPU< L2<float> > matcher;
+    BFMatcher_GPU matcher(NORM_L2);
     MatchesSet matches;
 
     // Find 1->2 matches
diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp
index e025d7dd1..01020a9c0 100644
--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -363,7 +363,7 @@ TEST(BruteForceMatcher)
 
     // Init GPU matcher
 
-    gpu::BruteForceMatcher_GPU< L2<float> > d_matcher;
+    gpu::BFMatcher_GPU d_matcher(NORM_L2);
 
     gpu::GpuMat d_query(query);
     gpu::GpuMat d_train(train);
diff --git a/samples/gpu/surf_keypoint_matcher.cpp b/samples/gpu/surf_keypoint_matcher.cpp
index dedef2c07..b56fa5672 100644
--- a/samples/gpu/surf_keypoint_matcher.cpp
+++ b/samples/gpu/surf_keypoint_matcher.cpp
@@ -57,7 +57,7 @@ int main(int argc, char* argv[])
     cout << "FOUND " << keypoints2GPU.cols << " keypoints on second image" << endl;
 
     // matching descriptors
-    BruteForceMatcher_GPU< L2<float> > matcher;
+    BFMatcher_GPU matcher(NORM_L2);
     GpuMat trainIdx, distance;
     matcher.matchSingle(descriptors1GPU, descriptors2GPU, trainIdx, distance);
     
@@ -69,7 +69,7 @@ int main(int argc, char* argv[])
     surf.downloadKeypoints(keypoints2GPU, keypoints2);
     surf.downloadDescriptors(descriptors1GPU, descriptors1);
     surf.downloadDescriptors(descriptors2GPU, descriptors2);
-    BruteForceMatcher_GPU< L2<float> >::matchDownload(trainIdx, distance, matches);
+    BFMatcher_GPU::matchDownload(trainIdx, distance, matches);
 
     // drawing the results
     Mat img_matches;