diff --git a/modules/flann/include/opencv2/flann/defines.h b/modules/flann/include/opencv2/flann/defines.h index 13833b3c0..f0264f74e 100644 --- a/modules/flann/include/opencv2/flann/defines.h +++ b/modules/flann/include/opencv2/flann/defines.h @@ -107,6 +107,7 @@ enum flann_centers_init_t FLANN_CENTERS_RANDOM = 0, FLANN_CENTERS_GONZALES = 1, FLANN_CENTERS_KMEANSPP = 2, + FLANN_CENTERS_GROUPWISE = 3, // deprecated constants, should use the FLANN_CENTERS_* ones instead CENTERS_RANDOM = 0, diff --git a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h index 710382053..88af4e706 100644 --- a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h +++ b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h @@ -257,6 +257,84 @@ private: } + /** + * Chooses the initial centers in a way inspired by Gonzales (by Pierre-Emmanuel Viel): + * select the first point of the list as a candidate, then parse the points list. If another + * point is further than current candidate from the other centers, test if it is a good center + * of a local aggregation. If it is, replace current candidate by this point. And so on... + * + * Used with KMeansIndex that computes centers coordinates by averaging positions of clusters points, + * this doesn't make a real difference with previous methods. But used with HierarchicalClusteringIndex + * class that pick centers among existing points instead of computing the barycenters, there is a real + * improvement. + * + * Params: + * k = number of centers + * vecs = the dataset of points + * indices = indices in the dataset + * Returns: + */ + void GroupWiseCenterChooser(int k, int* dsindices, int indices_length, int* centers, int& centers_length) + { + const float kSpeedUpFactor = 1.3f; + + int n = indices_length; + + DistanceType* closestDistSq = new DistanceType[n]; + + // Choose one random center and set the closestDistSq values + int index = rand_int(n); + assert(index >=0 && index < n); + centers[0] = dsindices[index]; + + for (int i = 0; i < n; i++) { + closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols); + } + + + // Choose each center + int centerCount; + for (centerCount = 1; centerCount < k; centerCount++) { + + // Repeat several trials + double bestNewPot = -1; + int bestNewIndex = 0; + DistanceType furthest = 0; + for (index = 0; index < n; index++) { + + // We will test only the potential of the points further than current candidate + if( closestDistSq[index] > kSpeedUpFactor * (float)furthest ) { + + // Compute the new potential + double newPot = 0; + for (int i = 0; i < n; i++) { + newPot += std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols) + , closestDistSq[i] ); + } + + // Store the best result + if ((bestNewPot < 0)||(newPot <= bestNewPot)) { + bestNewPot = newPot; + bestNewIndex = index; + furthest = closestDistSq[index]; + } + } + } + + // Add the appropriate center + centers[centerCount] = dsindices[bestNewIndex]; + for (int i = 0; i < n; i++) { + closestDistSq[i] = std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols) + , closestDistSq[i] ); + } + } + + centers_length = centerCount; + + delete[] closestDistSq; + } + + public: @@ -290,6 +368,9 @@ public: else if (centers_init_==FLANN_CENTERS_KMEANSPP) { chooseCenters = &HierarchicalClusteringIndex::chooseCentersKMeanspp; } + else if (centers_init_==FLANN_CENTERS_GROUPWISE) { + chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser; + } else { throw FLANNException("Unknown algorithm for choosing initial centers."); }