From dd0fa63ca87d1a32ea8f48f893042f38d36778c8 Mon Sep 17 00:00:00 2001
From: yao <bitwangyaoyao@gmail.com>
Date: Fri, 25 Oct 2013 16:01:41 +0800
Subject: [PATCH 1/2] fix the bug of ocl::bruteForceMatcher

---
 modules/ocl/perf/perf_brute_force_matcher.cpp | 14 +++++++-------
 modules/ocl/src/opencl/brute_force_match.cl   |  7 ++++++-
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/modules/ocl/perf/perf_brute_force_matcher.cpp b/modules/ocl/perf/perf_brute_force_matcher.cpp
index 33c42c72d..09b99f5e2 100644
--- a/modules/ocl/perf/perf_brute_force_matcher.cpp
+++ b/modules/ocl/perf/perf_brute_force_matcher.cpp
@@ -53,8 +53,8 @@ using namespace perf;
 
 typedef TestBaseWithParam<Size> BruteForceMatcherFixture;
 
-PERF_TEST_P(BruteForceMatcherFixture, DISABLED_match,
-            OCL_BFMATCHER_TYPICAL_MAT_SIZES) // TODO too big difference between implementations
+PERF_TEST_P(BruteForceMatcherFixture, match,
+            OCL_BFMATCHER_TYPICAL_MAT_SIZES)
 {
     const Size srcSize = GetParam();
 
@@ -82,14 +82,14 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_match,
 
         oclMatcher.matchDownload(oclTrainIdx, oclDistance, matches);
 
-        SANITY_CHECK_MATCHES(matches);
+        SANITY_CHECK_MATCHES(matches, 1e-5);
     }
     else
         OCL_PERF_ELSE
 }
 
-PERF_TEST_P(BruteForceMatcherFixture, DISABLED_knnMatch,
-            OCL_BFMATCHER_TYPICAL_MAT_SIZES) // TODO too big difference between implementations
+PERF_TEST_P(BruteForceMatcherFixture, knnMatch,
+            OCL_BFMATCHER_TYPICAL_MAT_SIZES)
 {
     const Size srcSize = GetParam();
 
@@ -123,8 +123,8 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_knnMatch,
         oclMatcher.knnMatchDownload(oclTrainIdx, oclDistance, matches);
 
         std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
-        SANITY_CHECK_MATCHES(matches0);
-        SANITY_CHECK_MATCHES(matches1);
+        SANITY_CHECK_MATCHES(matches0, 1e-5);
+        SANITY_CHECK_MATCHES(matches1, 1e-5);
     }
     else
         OCL_PERF_ELSE
diff --git a/modules/ocl/src/opencl/brute_force_match.cl b/modules/ocl/src/opencl/brute_force_match.cl
index a05c98ee0..ad668e6e3 100644
--- a/modules/ocl/src/opencl/brute_force_match.cl
+++ b/modules/ocl/src/opencl/brute_force_match.cl
@@ -17,6 +17,7 @@
 // @Authors
 //    Nathan, liujun@multicorewareinc.com
 //    Peng Xiao, pengxiao@outlook.com
+//    Baichuan Su, baichuan@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -128,7 +129,7 @@ result_type reduce_multi_block(
             s_query[lidy * MAX_DESC_LEN + block_index * BLOCK_SIZE + j],
             s_train[j * BLOCK_SIZE + lidx]);
     }
-    return DIST_RES(result);
+    return result;
 }
 
 /* 2dim launch, global size: dim0 is (query rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, dim1 is BLOCK_SIZE
@@ -187,6 +188,8 @@ __kernel void BruteForceMatch_UnrollMatch(
             barrier(CLK_LOCAL_MEM_FENCE);
         }
 
+        result = DIST_RES(result);
+
         int trainIdx = t * BLOCK_SIZE + lidx;
 
         if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/)
@@ -493,6 +496,8 @@ __kernel void BruteForceMatch_knnUnrollMatch(
             barrier(CLK_LOCAL_MEM_FENCE);
         }
 
+        result = DIST_RES(result);
+
         const int trainIdx = t * BLOCK_SIZE + lidx;
 
         if (queryIdx < query_rows && trainIdx < train_rows)

From 632452cdd855144afbf5c638ad69a02b8b2c45db Mon Sep 17 00:00:00 2001
From: yao <bitwangyaoyao@gmail.com>
Date: Mon, 28 Oct 2013 16:32:46 +0800
Subject: [PATCH 2/2] fix the mismatch running on cpu devices

---
 modules/ocl/src/opencl/brute_force_match.cl | 26 +++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/modules/ocl/src/opencl/brute_force_match.cl b/modules/ocl/src/opencl/brute_force_match.cl
index ad668e6e3..cb0aba255 100644
--- a/modules/ocl/src/opencl/brute_force_match.cl
+++ b/modules/ocl/src/opencl/brute_force_match.cl
@@ -113,6 +113,24 @@ result_type reduce_block(
     return DIST_RES(result);
 }
 
+result_type reduce_block_match(
+    __local value_type *s_query,
+    __local value_type *s_train,
+    int lidx,
+    int lidy
+    )
+{
+    result_type result = 0;
+    #pragma unroll
+    for (int j = 0 ; j < BLOCK_SIZE ; j++)
+    {
+        result += DIST(
+            s_query[lidy * BLOCK_SIZE + j],
+            s_train[j * BLOCK_SIZE + lidx]);
+    }
+    return (result);
+}
+
 result_type reduce_multi_block(
     __local value_type *s_query,
     __local value_type *s_train,
@@ -275,11 +293,13 @@ __kernel void BruteForceMatch_Match(
 
             barrier(CLK_LOCAL_MEM_FENCE);
 
-            result += reduce_block(s_query, s_train, lidx, lidy);
+            result += reduce_block_match(s_query, s_train, lidx, lidy);
 
             barrier(CLK_LOCAL_MEM_FENCE);
         }
 
+        result = DIST_RES(result);
+
         const int trainIdx = t * BLOCK_SIZE + lidx;
 
         if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/)
@@ -636,11 +656,13 @@ __kernel void BruteForceMatch_knnMatch(
 
             barrier(CLK_LOCAL_MEM_FENCE);
 
-            result += reduce_block(s_query, s_train, lidx, lidy);
+            result += reduce_block_match(s_query, s_train, lidx, lidy);
 
             barrier(CLK_LOCAL_MEM_FENCE);
         }
 
+        result = DIST_RES(result);
+
         const int trainIdx = t * BLOCK_SIZE + lidx;
 
         if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/)