/*********************************************************************** * Software License Agreement (BSD License) * * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. * * THE BSD LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************************/ #ifndef AUTOTUNEDINDEX_H_ #define AUTOTUNEDINDEX_H_ #include "constants.h" #include "nn_index.h" #include "ground_truth.h" #include "index_testing.h" namespace flann { class AutotunedIndex : public NNIndex { NNIndex* bestIndex; IndexParams* bestParams; SearchParams bestSearchParams; Matrix* sampledDataset; Matrix* testDataset; Matrix* gt_matches; float speedup; /** * The dataset used by this index */ const Matrix dataset; /** * Index parameters */ const AutotunedIndexParams& params; /** * Number of features in the dataset. */ int size_; /** * Length of each feature. */ int veclen_; public: AutotunedIndex(const Matrix& inputData, const AutotunedIndexParams& params_ = AutotunedIndexParams() ) : dataset(inputData), params(params_) { size_ = dataset.rows; veclen_ = dataset.cols; bestIndex = NULL; } virtual ~AutotunedIndex() { delete bestIndex; delete bestParams; }; /** Method responsible with building the index. */ virtual void buildIndex() { bestParams = estimateBuildParams(); bestIndex = bestParams->createIndex(dataset); bestIndex->buildIndex(); speedup = estimateSearchParams(bestSearchParams); } /** Saves the index to a stream */ virtual void saveIndex(FILE* stream) { bestIndex->saveIndex(stream); } /** Loads the index from a stream */ virtual void loadIndex(FILE* stream) { bestIndex->loadIndex(stream); } /** Method that searches for nearest-neighbors */ virtual void findNeighbors(ResultSet& result, const float* vec, const SearchParams& /*searchParams*/) { bestIndex->findNeighbors(result, vec, bestSearchParams); } /** Number of features in this index. */ virtual int size() const { return bestIndex->size(); } /** The length of each vector in this index. */ virtual int veclen() const { return bestIndex->veclen(); } /** The amount of memory (in bytes) this index uses. */ virtual int usedMemory() const { return bestIndex->usedMemory(); } /** * Algorithm name */ virtual flann_algorithm_t getType() const { return bestIndex->getType(); } /** Estimates the search parameters required in order to get a certain precision. If testset is not given it uses cross-validation. */ // virtual Params estimateSearchParams(float precision, Dataset* testset = NULL) // { // Params params; // // return params; // } private: struct CostData { float searchTimeCost; float buildTimeCost; float timeCost; float memoryCost; float totalCost; }; typedef pair KDTreeCostData; typedef pair KMeansCostData; void evaluate_kmeans(CostData& cost, const KMeansIndexParams& kmeans_params) { StartStopTimer t; int checks; const int nn = 1; logger.info("KMeansTree using params: max_iterations=%d, branching=%d\n", kmeans_params.iterations, kmeans_params.branching); KMeansIndex kmeans(*sampledDataset, kmeans_params); // measure index build time t.start(); kmeans.buildIndex(); t.stop(); float buildTime = (float)t.value; // measure search time float searchTime = test_index_precision(kmeans, *sampledDataset, *testDataset, *gt_matches, params.target_precision, checks, nn);; float datasetMemory = (float)(sampledDataset->rows*sampledDataset->cols*sizeof(float)); cost.memoryCost = (kmeans.usedMemory()+datasetMemory)/datasetMemory; cost.searchTimeCost = searchTime; cost.buildTimeCost = buildTime; cost.timeCost = (buildTime*params.build_weight+searchTime); logger.info("KMeansTree buildTime=%g, searchTime=%g, timeCost=%g, buildTimeFactor=%g\n",buildTime, searchTime, cost.timeCost, params.build_weight); } void evaluate_kdtree(CostData& cost, const KDTreeIndexParams& kdtree_params) { StartStopTimer t; int checks; const int nn = 1; logger.info("KDTree using params: trees=%d\n",kdtree_params.trees); KDTreeIndex kdtree(*sampledDataset, kdtree_params); t.start(); kdtree.buildIndex(); t.stop(); float buildTime = (float)t.value; //measure search time float searchTime = test_index_precision(kdtree, *sampledDataset, *testDataset, *gt_matches, params.target_precision, checks, nn); float datasetMemory = (float)(sampledDataset->rows*sampledDataset->cols*sizeof(float)); cost.memoryCost = (kdtree.usedMemory()+datasetMemory)/datasetMemory; cost.searchTimeCost = searchTime; cost.buildTimeCost = buildTime; cost.timeCost = (buildTime*params.build_weight+searchTime); logger.info("KDTree buildTime=%g, searchTime=%g, timeCost=%g\n",buildTime, searchTime, cost.timeCost); } // struct KMeansSimpleDownhillFunctor { // // Autotune& autotuner; // KMeansSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}; // // float operator()(int* params) { // // float maxFloat = numeric_limits::max(); // // if (params[0]<2) return maxFloat; // if (params[1]<0) return maxFloat; // // CostData c; // c.params["algorithm"] = KMEANS; // c.params["centers-init"] = CENTERS_RANDOM; // c.params["branching"] = params[0]; // c.params["max-iterations"] = params[1]; // // autotuner.evaluate_kmeans(c); // // return c.timeCost; // // } // }; // // struct KDTreeSimpleDownhillFunctor { // // Autotune& autotuner; // KDTreeSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}; // // float operator()(int* params) { // float maxFloat = numeric_limits::max(); // // if (params[0]<1) return maxFloat; // // CostData c; // c.params["algorithm"] = KDTREE; // c.params["trees"] = params[0]; // // autotuner.evaluate_kdtree(c); // // return c.timeCost; // // } // }; KMeansCostData optimizeKMeans() { logger.info("KMEANS, Step 1: Exploring parameter space\n"); // explore kmeans parameters space using combinations of the parameters below int maxIterations[] = { 1, 5, 10, 15 }; int branchingFactors[] = { 16, 32, 64, 128, 256 }; int kmeansParamSpaceSize = ARRAY_LEN(maxIterations)*ARRAY_LEN(branchingFactors); vector kmeansCosts(kmeansParamSpaceSize); // CostData* kmeansCosts = new CostData[kmeansParamSpaceSize]; // evaluate kmeans for all parameter combinations int cnt = 0; for (size_t i=0; i0 && kmeansCosts[k].first.timeCost < kmeansCosts[k-1].first.timeCost) { swap(kmeansCosts[k],kmeansCosts[k-1]); --k; } ++cnt; } } // logger.info("KMEANS, Step 2: simplex-downhill optimization\n"); // // const int n = 2; // // choose initial simplex points as the best parameters so far // int kmeansNMPoints[n*(n+1)]; // float kmeansVals[n+1]; // for (int i=0;i0 && kmeansCosts[k].first.totalCost < kmeansCosts[k-1].first.totalCost) { swap(kmeansCosts[k],kmeansCosts[k-1]); k--; } } // display the costs obtained for (int i=0;i kdtreeCosts(kdtreeParamSpaceSize); // evaluate kdtree for all parameter combinations int cnt = 0; for (size_t i=0; i0 && kdtreeCosts[k].first.timeCost < kdtreeCosts[k-1].first.timeCost) { swap(kdtreeCosts[k],kdtreeCosts[k-1]); --k; } ++cnt; } // logger.info("KD-TREE, Step 2: simplex-downhill optimization\n"); // // const int n = 1; // // choose initial simplex points as the best parameters so far // int kdtreeNMPoints[n*(n+1)]; // float kdtreeVals[n+1]; // for (int i=0;i0 && kdtreeCosts[k].first.totalCost < kdtreeCosts[k-1].first.totalCost) { swap(kdtreeCosts[k],kdtreeCosts[k-1]); k--; } } // display costs obtained for (size_t i=0;isample(testSampleSize,true); // We compute the ground truth using linear search logger.info("Computing ground truth... \n"); gt_matches = new Matrix(testDataset->rows, 1); StartStopTimer t; t.start(); compute_ground_truth(*sampledDataset, *testDataset, *gt_matches, 0); t.stop(); float bestCost = (float)t.value; IndexParams* bestParams = new LinearIndexParams(); // Start parameter autotune process logger.info("Autotuning parameters...\n"); KMeansCostData kmeansCost = optimizeKMeans(); if (kmeansCost.first.totalCost0) { Matrix* testDataset = dataset.sample(samples); logger.info("Computing ground truth\n"); // we need to compute the ground truth first Matrix gt_matches(testDataset->rows,1); StartStopTimer t; t.start(); compute_ground_truth(dataset, *testDataset, gt_matches,1); t.stop(); float linear = (float)t.value; int checks; logger.info("Estimating number of checks\n"); float searchTime; float cb_index; if (bestIndex->getType() == KMEANS) { logger.info("KMeans algorithm, estimating cluster border factor\n"); KMeansIndex* kmeans = (KMeansIndex*)bestIndex; float bestSearchTime = -1; float best_cb_index = -1; int best_checks = -1; for (cb_index = 0;cb_index<1.1f; cb_index+=0.2f) { kmeans->set_cb_index(cb_index); searchTime = test_index_precision(*kmeans, dataset, *testDataset, gt_matches, params.target_precision, checks, nn, 1); if (searchTimeset_cb_index(best_cb_index); logger.info("Optimum cb_index: %g\n",cb_index); ((KMeansIndexParams*)bestParams)->cb_index = cb_index; } else { searchTime = test_index_precision(*bestIndex, dataset, *testDataset, gt_matches, params.target_precision, checks, nn, 1); } logger.info("Required number of checks: %d \n",checks);; searchParams.checks = checks; delete testDataset; speedup = linear/searchTime; } return speedup; } }; } #endif /* AUTOTUNEDINDEX_H_ */