Merge remote-tracking branch 'upstream/2.4' into LshOrthogonalSubvectors

2014-05-21 01:31:52 +02:00
parent e63d7de87c 976da2f3d3
commit 00367cfb00
485 changed files with 25360 additions and 5019 deletions
--- a/modules/flann/include/opencv2/flann/autotuned_index.h
+++ b/modules/flann/include/opencv2/flann/autotuned_index.h
@@ -99,18 +99,22 @@ public:
     */
    virtual void buildIndex()
    {
+        std::ostringstream stream;
        bestParams_ = estimateBuildParams();
+        print_params(bestParams_, stream);
        Logger::info("----------------------------------------------------\n");
        Logger::info("Autotuned parameters:\n");
-        print_params(bestParams_);
+        Logger::info("%s", stream.str().c_str());
        Logger::info("----------------------------------------------------\n");

        bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_);
        bestIndex_->buildIndex();
        speedup_ = estimateSearchParams(bestSearchParams_);
+        stream.str(std::string());
+        print_params(bestSearchParams_, stream);
        Logger::info("----------------------------------------------------\n");
        Logger::info("Search parameters:\n");
-        print_params(bestSearchParams_);
+        Logger::info("%s", stream.str().c_str());
        Logger::info("----------------------------------------------------\n");
    }

--- a/modules/flann/include/opencv2/flann/dist.h
+++ b/modules/flann/include/opencv2/flann/dist.h
@@ -812,6 +812,66 @@ struct ZeroIterator

 };

+
+/*
+ * Depending on processed distances, some of them are already squared (e.g. L2)
+ * and some are not (e.g.Hamming). In KMeans++ for instance we want to be sure
+ * we are working on ^2 distances, thus following templates to ensure that.
+ */
+template <typename Distance, typename ElementType>
+struct squareDistance
+{
+    typedef typename Distance::ResultType ResultType;
+    ResultType operator()( ResultType dist ) { return dist*dist; }
+};
+
+
+template <typename ElementType>
+struct squareDistance<L2_Simple<ElementType>, ElementType>
+{
+    typedef typename L2_Simple<ElementType>::ResultType ResultType;
+    ResultType operator()( ResultType dist ) { return dist; }
+};
+
+template <typename ElementType>
+struct squareDistance<L2<ElementType>, ElementType>
+{
+    typedef typename L2<ElementType>::ResultType ResultType;
+    ResultType operator()( ResultType dist ) { return dist; }
+};
+
+
+template <typename ElementType>
+struct squareDistance<MinkowskiDistance<ElementType>, ElementType>
+{
+    typedef typename MinkowskiDistance<ElementType>::ResultType ResultType;
+    ResultType operator()( ResultType dist ) { return dist; }
+};
+
+template <typename ElementType>
+struct squareDistance<HellingerDistance<ElementType>, ElementType>
+{
+    typedef typename HellingerDistance<ElementType>::ResultType ResultType;
+    ResultType operator()( ResultType dist ) { return dist; }
+};
+
+template <typename ElementType>
+struct squareDistance<ChiSquareDistance<ElementType>, ElementType>
+{
+    typedef typename ChiSquareDistance<ElementType>::ResultType ResultType;
+    ResultType operator()( ResultType dist ) { return dist; }
+};
+
+
+template <typename Distance>
+typename Distance::ResultType ensureSquareDistance( typename Distance::ResultType dist )
+{
+    typedef typename Distance::ElementType ElementType;
+
+    squareDistance<Distance, ElementType> dummy;
+    return dummy( dist );
+}
+
 }

 #endif //OPENCV_FLANN_DIST_H_
--- a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h
+++ b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h
@@ -210,8 +210,11 @@ private:
        assert(index >=0 && index < n);
        centers[0] = dsindices[index];

+        // Computing distance^2 will have the advantage of even higher probability further to pick new centers
+        // far from previous centers (and this complies to "k-means++: the advantages of careful seeding" article)
        for (int i = 0; i < n; i++) {
            closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
+            closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
            currentPot += closestDistSq[i];
        }

@@ -237,7 +240,10 @@ private:

                // Compute the new potential
                double newPot = 0;
-                for (int i = 0; i < n; i++) newPot += std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols), closestDistSq[i] );
+                for (int i = 0; i < n; i++) {
+                    DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
+                    newPot += std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
+                }

                // Store the best result
                if ((bestNewPot < 0)||(newPot < bestNewPot)) {
@@ -249,7 +255,10 @@ private:
            // Add the appropriate center
            centers[centerCount] = dsindices[bestNewIndex];
            currentPot = bestNewPot;
-            for (int i = 0; i < n; i++) closestDistSq[i] = std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols), closestDistSq[i] );
+            for (int i = 0; i < n; i++) {
+                DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols);
+                closestDistSq[i] = std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
+            }
        }

        centers_length = centerCount;
@@ -414,12 +423,6 @@ public:

    void loadIndex(FILE* stream)
    {
-        load_value(stream, branching_);
-        load_value(stream, trees_);
-        load_value(stream, centers_init_);
-        load_value(stream, leaf_size_);
-        load_value(stream, memoryCounter);
-
        free_elements();

        if (root!=NULL) {
@@ -430,6 +433,12 @@ public:
            delete[] indices;
        }

+        load_value(stream, branching_);
+        load_value(stream, trees_);
+        load_value(stream, centers_init_);
+        load_value(stream, leaf_size_);
+        load_value(stream, memoryCounter);
+
        indices = new int*[trees_];
        root = new NodePtr[trees_];
        for (int i=0; i<trees_; ++i) {
--- a/modules/flann/include/opencv2/flann/kmeans_index.h
+++ b/modules/flann/include/opencv2/flann/kmeans_index.h
@@ -211,6 +211,7 @@ public:

        for (int i = 0; i < n; i++) {
            closestDistSq[i] = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
+            closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
            currentPot += closestDistSq[i];
        }

@@ -236,7 +237,10 @@ public:

                // Compute the new potential
                double newPot = 0;
-                for (int i = 0; i < n; i++) newPot += std::min( distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols), closestDistSq[i] );
+                for (int i = 0; i < n; i++) {
+                    DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
+                    newPot += std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
+                }

                // Store the best result
                if ((bestNewPot < 0)||(newPot < bestNewPot)) {
@@ -248,7 +252,10 @@ public:
            // Add the appropriate center
            centers[centerCount] = indices[bestNewIndex];
            currentPot = bestNewPot;
-            for (int i = 0; i < n; i++) closestDistSq[i] = std::min( distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols), closestDistSq[i] );
+            for (int i = 0; i < n; i++) {
+                DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols);
+                closestDistSq[i] = std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
+            }
        }

        centers_length = centerCount;
@@ -759,10 +766,13 @@ private:

                    for (int k=0; k<indices_length; ++k) {
                        if (belongs_to[k]==j) {
-                            belongs_to[k] = i;
-                            count[j]--;
-                            count[i]++;
-                            break;
+                            // for cluster j, we move the furthest element from the center to the empty cluster i
+                            if ( distance_(dataset_[indices[k]], dcenters[j], veclen_) == radiuses[j] ) {
+                                belongs_to[k] = i;
+                                count[j]--;
+                                count[i]++;
+                                break;
+                            }
                        }
                    }
                    converged = false;
--- a/modules/flann/include/opencv2/flann/params.h
+++ b/modules/flann/include/opencv2/flann/params.h
@@ -79,16 +79,19 @@ T get_param(const IndexParams& params, std::string name)
    }
 }

-inline void print_params(const IndexParams& params)
+inline void print_params(const IndexParams& params, std::ostream& stream)
 {
    IndexParams::const_iterator it;

    for(it=params.begin(); it!=params.end(); ++it) {
-        std::cout << it->first << " : " << it->second << std::endl;
+        stream << it->first << " : " << it->second << std::endl;
    }
 }

-
+inline void print_params(const IndexParams& params)
+{
+    print_params(params, std::cout);
+}

 }