123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591 |
- /***********************************************************************
- * Software License Agreement (BSD License)
- *
- * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
- * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
- *
- * THE BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *************************************************************************/
- #ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
- #define OPENCV_FLANN_AUTOTUNED_INDEX_H_
- #include <sstream>
- #include "general.h"
- #include "nn_index.h"
- #include "ground_truth.h"
- #include "index_testing.h"
- #include "sampling.h"
- #include "kdtree_index.h"
- #include "kdtree_single_index.h"
- #include "kmeans_index.h"
- #include "composite_index.h"
- #include "linear_index.h"
- #include "logger.h"
- namespace cvflann
- {
- template<typename Distance>
- NNIndex<Distance>* create_index_by_type(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance);
- struct AutotunedIndexParams : public IndexParams
- {
- AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1)
- {
- (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED;
- // precision desired (used for autotuning, -1 otherwise)
- (*this)["target_precision"] = target_precision;
- // build tree time weighting factor
- (*this)["build_weight"] = build_weight;
- // index memory weighting factor
- (*this)["memory_weight"] = memory_weight;
- // what fraction of the dataset to use for autotuning
- (*this)["sample_fraction"] = sample_fraction;
- }
- };
- template <typename Distance>
- class AutotunedIndex : public NNIndex<Distance>
- {
- public:
- typedef typename Distance::ElementType ElementType;
- typedef typename Distance::ResultType DistanceType;
- AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
- dataset_(inputData), distance_(d)
- {
- target_precision_ = get_param(params, "target_precision",0.8f);
- build_weight_ = get_param(params,"build_weight", 0.01f);
- memory_weight_ = get_param(params, "memory_weight", 0.0f);
- sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
- bestIndex_ = NULL;
- speedup_ = 0;
- }
- AutotunedIndex(const AutotunedIndex&);
- AutotunedIndex& operator=(const AutotunedIndex&);
- virtual ~AutotunedIndex()
- {
- if (bestIndex_ != NULL) {
- delete bestIndex_;
- bestIndex_ = NULL;
- }
- }
- /**
- * Method responsible with building the index.
- */
- virtual void buildIndex() CV_OVERRIDE
- {
- std::ostringstream stream;
- bestParams_ = estimateBuildParams();
- print_params(bestParams_, stream);
- Logger::info("----------------------------------------------------\n");
- Logger::info("Autotuned parameters:\n");
- Logger::info("%s", stream.str().c_str());
- Logger::info("----------------------------------------------------\n");
- bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_);
- bestIndex_->buildIndex();
- speedup_ = estimateSearchParams(bestSearchParams_);
- stream.str(std::string());
- print_params(bestSearchParams_, stream);
- Logger::info("----------------------------------------------------\n");
- Logger::info("Search parameters:\n");
- Logger::info("%s", stream.str().c_str());
- Logger::info("----------------------------------------------------\n");
- }
- /**
- * Saves the index to a stream
- */
- virtual void saveIndex(FILE* stream) CV_OVERRIDE
- {
- save_value(stream, (int)bestIndex_->getType());
- bestIndex_->saveIndex(stream);
- save_value(stream, get_param<int>(bestSearchParams_, "checks"));
- }
- /**
- * Loads the index from a stream
- */
- virtual void loadIndex(FILE* stream) CV_OVERRIDE
- {
- int index_type;
- load_value(stream, index_type);
- IndexParams params;
- params["algorithm"] = (flann_algorithm_t)index_type;
- bestIndex_ = create_index_by_type<Distance>(dataset_, params, distance_);
- bestIndex_->loadIndex(stream);
- int checks;
- load_value(stream, checks);
- bestSearchParams_["checks"] = checks;
- }
- /**
- * Method that searches for nearest-neighbors
- */
- virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
- {
- int checks = get_param<int>(searchParams,"checks",FLANN_CHECKS_AUTOTUNED);
- if (checks == FLANN_CHECKS_AUTOTUNED) {
- bestIndex_->findNeighbors(result, vec, bestSearchParams_);
- }
- else {
- bestIndex_->findNeighbors(result, vec, searchParams);
- }
- }
- IndexParams getParameters() const CV_OVERRIDE
- {
- return bestIndex_->getParameters();
- }
- SearchParams getSearchParameters() const
- {
- return bestSearchParams_;
- }
- float getSpeedup() const
- {
- return speedup_;
- }
- /**
- * Number of features in this index.
- */
- virtual size_t size() const CV_OVERRIDE
- {
- return bestIndex_->size();
- }
- /**
- * The length of each vector in this index.
- */
- virtual size_t veclen() const CV_OVERRIDE
- {
- return bestIndex_->veclen();
- }
- /**
- * The amount of memory (in bytes) this index uses.
- */
- virtual int usedMemory() const CV_OVERRIDE
- {
- return bestIndex_->usedMemory();
- }
- /**
- * Algorithm name
- */
- virtual flann_algorithm_t getType() const CV_OVERRIDE
- {
- return FLANN_INDEX_AUTOTUNED;
- }
- private:
- struct CostData
- {
- float searchTimeCost;
- float buildTimeCost;
- float memoryCost;
- float totalCost;
- IndexParams params;
- };
- void evaluate_kmeans(CostData& cost)
- {
- StartStopTimer t;
- int checks;
- const int nn = 1;
- Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n",
- get_param<int>(cost.params,"iterations"),
- get_param<int>(cost.params,"branching"));
- KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_);
- // measure index build time
- t.start();
- kmeans.buildIndex();
- t.stop();
- float buildTime = (float)t.value;
- // measure search time
- float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
- float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
- cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
- cost.searchTimeCost = searchTime;
- cost.buildTimeCost = buildTime;
- Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
- }
- void evaluate_kdtree(CostData& cost)
- {
- StartStopTimer t;
- int checks;
- const int nn = 1;
- Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees"));
- KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
- t.start();
- kdtree.buildIndex();
- t.stop();
- float buildTime = (float)t.value;
- //measure search time
- float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
- float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
- cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
- cost.searchTimeCost = searchTime;
- cost.buildTimeCost = buildTime;
- Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
- }
- // struct KMeansSimpleDownhillFunctor {
- //
- // Autotune& autotuner;
- // KMeansSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}
- //
- // float operator()(int* params) {
- //
- // float maxFloat = numeric_limits<float>::max();
- //
- // if (params[0]<2) return maxFloat;
- // if (params[1]<0) return maxFloat;
- //
- // CostData c;
- // c.params["algorithm"] = KMEANS;
- // c.params["centers-init"] = CENTERS_RANDOM;
- // c.params["branching"] = params[0];
- // c.params["max-iterations"] = params[1];
- //
- // autotuner.evaluate_kmeans(c);
- //
- // return c.timeCost;
- //
- // }
- // };
- //
- // struct KDTreeSimpleDownhillFunctor {
- //
- // Autotune& autotuner;
- // KDTreeSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}
- //
- // float operator()(int* params) {
- // float maxFloat = numeric_limits<float>::max();
- //
- // if (params[0]<1) return maxFloat;
- //
- // CostData c;
- // c.params["algorithm"] = KDTREE;
- // c.params["trees"] = params[0];
- //
- // autotuner.evaluate_kdtree(c);
- //
- // return c.timeCost;
- //
- // }
- // };
- void optimizeKMeans(std::vector<CostData>& costs)
- {
- Logger::info("KMEANS, Step 1: Exploring parameter space\n");
- // explore kmeans parameters space using combinations of the parameters below
- int maxIterations[] = { 1, 5, 10, 15 };
- int branchingFactors[] = { 16, 32, 64, 128, 256 };
- int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
- costs.reserve(costs.size() + kmeansParamSpaceSize);
- // evaluate kmeans for all parameter combinations
- for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
- for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
- CostData cost;
- cost.params["algorithm"] = FLANN_INDEX_KMEANS;
- cost.params["centers_init"] = FLANN_CENTERS_RANDOM;
- cost.params["iterations"] = maxIterations[i];
- cost.params["branching"] = branchingFactors[j];
- evaluate_kmeans(cost);
- costs.push_back(cost);
- }
- }
- // Logger::info("KMEANS, Step 2: simplex-downhill optimization\n");
- //
- // const int n = 2;
- // // choose initial simplex points as the best parameters so far
- // int kmeansNMPoints[n*(n+1)];
- // float kmeansVals[n+1];
- // for (int i=0;i<n+1;++i) {
- // kmeansNMPoints[i*n] = (int)kmeansCosts[i].params["branching"];
- // kmeansNMPoints[i*n+1] = (int)kmeansCosts[i].params["max-iterations"];
- // kmeansVals[i] = kmeansCosts[i].timeCost;
- // }
- // KMeansSimpleDownhillFunctor kmeans_cost_func(*this);
- // // run optimization
- // optimizeSimplexDownhill(kmeansNMPoints,n,kmeans_cost_func,kmeansVals);
- // // store results
- // for (int i=0;i<n+1;++i) {
- // kmeansCosts[i].params["branching"] = kmeansNMPoints[i*2];
- // kmeansCosts[i].params["max-iterations"] = kmeansNMPoints[i*2+1];
- // kmeansCosts[i].timeCost = kmeansVals[i];
- // }
- }
- void optimizeKDTree(std::vector<CostData>& costs)
- {
- Logger::info("KD-TREE, Step 1: Exploring parameter space\n");
- // explore kd-tree parameters space using the parameters below
- int testTrees[] = { 1, 4, 8, 16, 32 };
- // evaluate kdtree for all parameter combinations
- for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
- CostData cost;
- cost.params["algorithm"] = FLANN_INDEX_KDTREE;
- cost.params["trees"] = testTrees[i];
- evaluate_kdtree(cost);
- costs.push_back(cost);
- }
- // Logger::info("KD-TREE, Step 2: simplex-downhill optimization\n");
- //
- // const int n = 1;
- // // choose initial simplex points as the best parameters so far
- // int kdtreeNMPoints[n*(n+1)];
- // float kdtreeVals[n+1];
- // for (int i=0;i<n+1;++i) {
- // kdtreeNMPoints[i] = (int)kdtreeCosts[i].params["trees"];
- // kdtreeVals[i] = kdtreeCosts[i].timeCost;
- // }
- // KDTreeSimpleDownhillFunctor kdtree_cost_func(*this);
- // // run optimization
- // optimizeSimplexDownhill(kdtreeNMPoints,n,kdtree_cost_func,kdtreeVals);
- // // store results
- // for (int i=0;i<n+1;++i) {
- // kdtreeCosts[i].params["trees"] = kdtreeNMPoints[i];
- // kdtreeCosts[i].timeCost = kdtreeVals[i];
- // }
- }
- /**
- * Chooses the best nearest-neighbor algorithm and estimates the optimal
- * parameters to use when building the index (for a given precision).
- * Returns a dictionary with the optimal parameters.
- */
- IndexParams estimateBuildParams()
- {
- std::vector<CostData> costs;
- int sampleSize = int(sample_fraction_ * dataset_.rows);
- int testSampleSize = std::min(sampleSize / 10, 1000);
- Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
- // For a very small dataset, it makes no sense to build any fancy index, just
- // use linear search
- if (testSampleSize < 10) {
- Logger::info("Choosing linear, dataset too small\n");
- return LinearIndexParams();
- }
- // We use a fraction of the original dataset to speedup the autotune algorithm
- sampledDataset_ = random_sample(dataset_, sampleSize);
- // We use a cross-validation approach, first we sample a testset from the dataset
- testDataset_ = random_sample(sampledDataset_, testSampleSize, true);
- // We compute the ground truth using linear search
- Logger::info("Computing ground truth... \n");
- gt_matches_ = Matrix<int>(new int[testDataset_.rows], testDataset_.rows, 1);
- StartStopTimer t;
- t.start();
- compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
- t.stop();
- CostData linear_cost;
- linear_cost.searchTimeCost = (float)t.value;
- linear_cost.buildTimeCost = 0;
- linear_cost.memoryCost = 0;
- linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR;
- costs.push_back(linear_cost);
- // Start parameter autotune process
- Logger::info("Autotuning parameters...\n");
- optimizeKMeans(costs);
- optimizeKDTree(costs);
- float bestTimeCost = costs[0].searchTimeCost;
- for (size_t i = 0; i < costs.size(); ++i) {
- float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
- if (timeCost < bestTimeCost) {
- bestTimeCost = timeCost;
- }
- }
- float bestCost = costs[0].searchTimeCost / bestTimeCost;
- IndexParams bestParams = costs[0].params;
- if (bestTimeCost > 0) {
- for (size_t i = 0; i < costs.size(); ++i) {
- float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
- memory_weight_ * costs[i].memoryCost;
- if (crtCost < bestCost) {
- bestCost = crtCost;
- bestParams = costs[i].params;
- }
- }
- }
- delete[] gt_matches_.data;
- delete[] testDataset_.data;
- delete[] sampledDataset_.data;
- return bestParams;
- }
- /**
- * Estimates the search time parameters needed to get the desired precision.
- * Precondition: the index is built
- * Postcondition: the searchParams will have the optimum params set, also the speedup obtained over linear search.
- */
- float estimateSearchParams(SearchParams& searchParams)
- {
- const int nn = 1;
- const size_t SAMPLE_COUNT = 1000;
- assert(bestIndex_ != NULL); // must have a valid index
- float speedup = 0;
- int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT);
- if (samples > 0) {
- Matrix<ElementType> testDataset = random_sample(dataset_, samples);
- Logger::info("Computing ground truth\n");
- // we need to compute the ground truth first
- Matrix<int> gt_matches(new int[testDataset.rows], testDataset.rows, 1);
- StartStopTimer t;
- t.start();
- compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
- t.stop();
- float linear = (float)t.value;
- int checks;
- Logger::info("Estimating number of checks\n");
- float searchTime;
- float cb_index;
- if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
- Logger::info("KMeans algorithm, estimating cluster border factor\n");
- KMeansIndex<Distance>* kmeans = (KMeansIndex<Distance>*)bestIndex_;
- float bestSearchTime = -1;
- float best_cb_index = -1;
- int best_checks = -1;
- for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
- kmeans->set_cb_index(cb_index);
- searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
- if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
- bestSearchTime = searchTime;
- best_cb_index = cb_index;
- best_checks = checks;
- }
- }
- searchTime = bestSearchTime;
- cb_index = best_cb_index;
- checks = best_checks;
- kmeans->set_cb_index(best_cb_index);
- Logger::info("Optimum cb_index: %g\n", cb_index);
- bestParams_["cb_index"] = cb_index;
- }
- else {
- searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
- }
- Logger::info("Required number of checks: %d \n", checks);
- searchParams["checks"] = checks;
- speedup = linear / searchTime;
- delete[] gt_matches.data;
- delete[] testDataset.data;
- }
- return speedup;
- }
- private:
- NNIndex<Distance>* bestIndex_;
- IndexParams bestParams_;
- SearchParams bestSearchParams_;
- Matrix<ElementType> sampledDataset_;
- Matrix<ElementType> testDataset_;
- Matrix<int> gt_matches_;
- float speedup_;
- /**
- * The dataset used by this index
- */
- const Matrix<ElementType> dataset_;
- /**
- * Index parameters
- */
- float target_precision_;
- float build_weight_;
- float memory_weight_;
- float sample_fraction_;
- Distance distance_;
- };
- }
- #endif /* OPENCV_FLANN_AUTOTUNED_INDEX_H_ */
|