From a87a1f159c7394f58e21959c0832ab6c72627902 Mon Sep 17 00:00:00 2001 From: Marius Muja Date: Wed, 28 Oct 2009 00:17:36 -0700 Subject: [PATCH] Misc --- doc/manual.tex | 6 ++-- src/cpp/CMakeLists.txt | 2 +- src/cpp/algorithms/kdtree_index.h | 4 ++- src/cpp/constants.h | 1 + src/cpp/flann.cpp | 33 +++++++++++++-------- src/cpp/flann.h | 12 ++++---- src/cpp/flann.hpp | 28 +++++++++++++++-- src/cpp/nn/index_testing.cpp | 1 + src/cpp/tests/CMakeLists.txt | 5 +++- src/cpp/util/saving.cpp | 1 + src/python/pyflann/bindings/flann_ctypes.py | 8 ++--- src/python/pyflann/command/run_test.py | 12 ++++++-- src/python/pyflann/io/npy_dataset.py | 3 +- 13 files changed, 81 insertions(+), 35 deletions(-) diff --git a/doc/manual.tex b/doc/manual.tex index a72858f..0cec975 100644 --- a/doc/manual.tex +++ b/doc/manual.tex @@ -279,12 +279,12 @@ must contain the following fields: percentage of the approximate nearest-neighbor searches that return the exact nearest-neighbor. Using a higher value for this parameter gives more -accurate results, but the searching takes longer. The optimum value +accurate results, but the search takes longer. The optimum value usually depends on the application. \item[\texttt{build\_weight}] - specifies the importance of the -index build time reported to the nearest-neighbor search time. In some +index build time raported to the nearest-neighbor search time. In some applications it's acceptable for the index build step to take a long time if the subsequent searches in the index can be performed very fast. In other applications it's required that the index be build as fast as @@ -342,7 +342,7 @@ convergence. This parameter is required only when the algorithm used is centers when performing a kmeans clustering step. The possible values are 'random' (picks the initial cluster centers randomly), 'gonzales' (picks the initial centers using the Gonzales algorithm) and 'kmeanspp' (picks the initial -centers using thealgorithm suggested in \cite{arthur_kmeanspp_2007}). If this +centers using the algorithm suggested in \cite{arthur_kmeanspp_2007}). If this parameters is omitted, the default value is 'random'. \item[\texttt{cb\_index}] - this parameter (cluster boundary index) influences the diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index ce27a27..b8f06f0 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -87,7 +87,7 @@ INSTALL ( # ) INSTALL ( - FILES flann.h constants.h + FILES flann.h flann.hpp constants.h util/matrix.h util/common.h util/random.h DESTINATION include ) diff --git a/src/cpp/algorithms/kdtree_index.h b/src/cpp/algorithms/kdtree_index.h index 0de4831..41c49c1 100644 --- a/src/cpp/algorithms/kdtree_index.h +++ b/src/cpp/algorithms/kdtree_index.h @@ -34,6 +34,7 @@ #include #include #include +#include #include "heap.h" #include "common.h" @@ -175,8 +176,8 @@ public: { size_ = dataset.rows; veclen_ = dataset.cols; - numTrees = params.trees; + trees = new Tree[numTrees]; // get the parameters @@ -306,6 +307,7 @@ public: */ void findNeighbors(ResultSet& result, const float* vec, const SearchParams& searchParams) { + int maxChecks = searchParams.checks; if (maxChecks<0) { diff --git a/src/cpp/constants.h b/src/cpp/constants.h index 3fea9ac..8fe32bf 100644 --- a/src/cpp/constants.h +++ b/src/cpp/constants.h @@ -40,6 +40,7 @@ enum flann_algorithm_t { KDTREE = 1, KMEANS = 2, COMPOSITE = 3, + KDTREE_MT = 4, SAVED = 254, AUTOTUNED = 255, }; diff --git a/src/cpp/flann.cpp b/src/cpp/flann.cpp index 1fe46a1..7026312 100644 --- a/src/cpp/flann.cpp +++ b/src/cpp/flann.cpp @@ -39,16 +39,15 @@ #include "object_factory.h" // index types #include "kdtree_index.h" +#include "kdtree_mt_index.h" #include "kmeans_index.h" #include "composite_index.h" #include "linear_index.h" #include "autotuned_index.h" -#include using namespace std; - #include "flann.h" #ifdef WIN32 @@ -83,6 +82,11 @@ NNIndex* KDTreeIndexParams::createIndex(const Matrix& dataset) const return new KDTreeIndex(dataset, *this); } +NNIndex* KDTreeMTIndexParams::createIndex(const Matrix& dataset) const +{ + return new KDTreeMTIndex(dataset, *this); +} + NNIndex* KMeansIndexParams::createIndex(const Matrix& dataset) const { return new KMeansIndex(dataset, *this); @@ -127,6 +131,7 @@ public: { ParamsFactory::instance().register_(LINEAR); ParamsFactory::instance().register_(KDTREE); + ParamsFactory::instance().register_(KDTREE_MT); ParamsFactory::instance().register_(KMEANS); ParamsFactory::instance().register_(COMPOSITE); ParamsFactory::instance().register_(AUTOTUNED); @@ -226,7 +231,7 @@ int hierarchicalClustering(const Matrix& features, Matrix& centers using namespace flann; -typedef NNIndex* NNIndexPtr; +typedef Index* IndexPtr; typedef Matrix* MatrixPtr; @@ -259,6 +264,7 @@ EXPORTED void flann_set_distance_type(flann_distance_t distance_type, int order) EXPORTED flann_index_t flann_build_index(float* dataset, int rows, int cols, float* speedup, FLANNParameters* flann_params) { try { + init_flann_parameters(flann_params); if (flann_params == NULL) { throw FLANNException("The flann_params argument must be non-null"); @@ -295,7 +301,7 @@ EXPORTED int flann_save_index(flann_index_t index_ptr, char* filename) } -EXPORTED FLANN_INDEX flann_load_index(char* filename, float* dataset, int rows, int cols) +EXPORTED flann_index_t flann_load_index(char* filename, float* dataset, int rows, int cols) { try { Index* index = new Index(Matrix(rows,cols,dataset), SavedIndexParams(filename)); @@ -357,7 +363,7 @@ EXPORTED int flann_find_nearest_neighbors_index(flann_index_t index_ptr, float* } -EXPORTED int flann_radius_search(FLANN_INDEX index_ptr, +EXPORTED int flann_radius_search(flann_index_t index_ptr, float* query, int* indices, float* dists, @@ -390,7 +396,7 @@ EXPORTED int flann_radius_search(FLANN_INDEX index_ptr, } -EXPORTED int flann_free_index(FLANN_INDEX index_ptr, FLANNParameters* flann_params) +EXPORTED int flann_free_index(flann_index_t index_ptr, FLANNParameters* flann_params) { try { init_flann_parameters(flann_params); @@ -437,7 +443,7 @@ EXPORTED void compute_ground_truth_float(float* dataset, int dshape[], float* te } -EXPORTED float test_with_precision(FLANN_INDEX index_ptr, float* dataset, int dshape[], float* testset, int tshape[], int* matches, int mshape[], +EXPORTED float test_with_precision(flann_index_t index_ptr, float* dataset, int dshape[], float* testset, int tshape[], int* matches, int mshape[], int nn, float precision, int* checks, int skip = 0) { assert(dshape[1]==tshape[1]); @@ -447,8 +453,10 @@ EXPORTED float test_with_precision(FLANN_INDEX index_ptr, float* dataset, int ds if (index_ptr==NULL) { throw FLANNException("Invalid index"); } - NNIndexPtr index = (NNIndexPtr)index_ptr; - return test_index_precision(*index, Matrix(dshape[0], dshape[1],dataset), Matrix(tshape[0], tshape[1], testset), + + IndexPtr index = (IndexPtr)index_ptr; + NNIndex* nn_index = index->index(); + return test_index_precision(*nn_index, Matrix(dshape[0], dshape[1],dataset), Matrix(tshape[0], tshape[1], testset), Matrix(mshape[0],mshape[1],matches), precision, *checks, nn, skip); } catch (runtime_error& e) { logger.error("Caught exception: %s\n",e.what()); @@ -456,7 +464,7 @@ EXPORTED float test_with_precision(FLANN_INDEX index_ptr, float* dataset, int ds } } -EXPORTED float test_with_checks(FLANN_INDEX index_ptr, float* dataset, int dshape[], float* testset, int tshape[], int* matches, int mshape[], +EXPORTED float test_with_checks(flann_index_t index_ptr, float* dataset, int dshape[], float* testset, int tshape[], int* matches, int mshape[], int nn, int checks, float* precision, int skip = 0) { assert(dshape[1]==tshape[1]); @@ -466,8 +474,9 @@ EXPORTED float test_with_checks(FLANN_INDEX index_ptr, float* dataset, int dshap if (index_ptr==NULL) { throw FLANNException("Invalid index"); } - NNIndexPtr index = (NNIndexPtr)index_ptr; - return test_index_checks(*index, Matrix(dshape[0], dshape[1],dataset), Matrix(tshape[0], tshape[1], testset), + IndexPtr index = (IndexPtr)index_ptr; + NNIndex* nn_index = index->index(); + return test_index_checks(*nn_index, Matrix(dshape[0], dshape[1],dataset), Matrix(tshape[0], tshape[1], testset), Matrix(mshape[0],mshape[1],matches), checks, *precision, nn, skip); } catch (runtime_error& e) { logger.error("Caught exception: %s\n",e.what()); diff --git a/src/cpp/flann.h b/src/cpp/flann.h index 068753f..786aa1c 100644 --- a/src/cpp/flann.h +++ b/src/cpp/flann.h @@ -111,7 +111,7 @@ Params: Returns: the newly created index or a number <0 for error */ -LIBSPEC FLANN_INDEX flann_build_index(float* dataset, +LIBSPEC flann_index_t flann_build_index(float* dataset, int rows, int cols, float* speedup, @@ -128,7 +128,7 @@ LIBSPEC FLANN_INDEX flann_build_index(float* dataset, * @param filename The filename the index should be saved to * @return Returns 0 on success, negative value on error. */ -LIBSPEC int flann_save_index(FLANN_INDEX index_id, +LIBSPEC int flann_save_index(flann_index_t index_id, char* filename); @@ -141,7 +141,7 @@ LIBSPEC int flann_save_index(FLANN_INDEX index_id, * @param cols Dataset columns * @return */ -LIBSPEC FLANN_INDEX flann_load_index(char* filename, +LIBSPEC flann_index_t flann_load_index(char* filename, float* dataset, int rows, int cols); @@ -192,7 +192,7 @@ Params: Returns: zero or a number <0 for error */ -LIBSPEC int flann_find_nearest_neighbors_index(FLANN_INDEX index_id, +LIBSPEC int flann_find_nearest_neighbors_index(flann_index_t index_id, float* testset, int trows, int* indices, @@ -217,7 +217,7 @@ LIBSPEC int flann_find_nearest_neighbors_index(FLANN_INDEX index_id, * a higher search speedup at the cost of potentially not returning all the * neighbours in the specified radius. */ -LIBSPEC int flann_radius_search(FLANN_INDEX index_ptr, /* the index */ +LIBSPEC int flann_radius_search(flann_index_t index_ptr, /* the index */ float* query, /* query point */ int* indices, /* array for storing the indices found (will be modified) */ float* dists, /* similar, but for storing distances */ @@ -236,7 +236,7 @@ Params: Returns: zero or a number <0 for error */ -LIBSPEC int flann_free_index(FLANN_INDEX index_id, +LIBSPEC int flann_free_index(flann_index_t index_id, struct FLANNParameters* flann_params); /** diff --git a/src/cpp/flann.hpp b/src/cpp/flann.hpp index 43c07bf..a8095e0 100644 --- a/src/cpp/flann.hpp +++ b/src/cpp/flann.hpp @@ -59,8 +59,8 @@ public: static IndexParams* createFromParameters(const FLANNParameters& p); - void fromParameters(const FLANNParameters& p) {}; - void toParameters(FLANNParameters& p) { }; + virtual void fromParameters(const FLANNParameters& p) {}; + virtual void toParameters(FLANNParameters& p) { }; }; struct LinearIndexParams : public IndexParams { @@ -91,6 +91,28 @@ struct KDTreeIndexParams : public IndexParams { }; + +struct KDTreeMTIndexParams : public IndexParams { + KDTreeMTIndexParams(int trees_ = 4) : trees(trees_) {}; + + int trees; // number of randomized trees to use (for kdtree) + + NNIndex* createIndex(const Matrix& dataset) const; + + void fromParameters(const FLANNParameters& p) + { + trees = p.trees; + } + + void toParameters(FLANNParameters& p) + { + p.algorithm = KDTREE_MT; + p.trees = trees; + }; + +}; + + struct KMeansIndexParams : public IndexParams { KMeansIndexParams(int branching_ = 32, int iterations_ = 11, flann_centers_init_t centers_init_ = CENTERS_RANDOM, float cb_index_ = 0.2 ) : @@ -236,6 +258,8 @@ public: int veclen() const; int size() const; + + NNIndex* index() { return nnIndex; } }; diff --git a/src/cpp/nn/index_testing.cpp b/src/cpp/nn/index_testing.cpp index bba69c3..1d66d12 100644 --- a/src/cpp/nn/index_testing.cpp +++ b/src/cpp/nn/index_testing.cpp @@ -37,6 +37,7 @@ #include #include +#include namespace flann diff --git a/src/cpp/tests/CMakeLists.txt b/src/cpp/tests/CMakeLists.txt index cbd9b76..30e8285 100644 --- a/src/cpp/tests/CMakeLists.txt +++ b/src/cpp/tests/CMakeLists.txt @@ -10,7 +10,10 @@ ADD_EXECUTABLE(flann_test flann_test.cc) TARGET_LINK_LIBRARIES(flann_test flann) + + + INSTALL ( - TARGETS flann_test + TARGETS flann_test RUNTIME DESTINATION bin ) diff --git a/src/cpp/util/saving.cpp b/src/cpp/util/saving.cpp index a5e3a7e..ad09702 100644 --- a/src/cpp/util/saving.cpp +++ b/src/cpp/util/saving.cpp @@ -32,6 +32,7 @@ #include "saving.h" #include "nn_index.h" #include +#include namespace flann { diff --git a/src/python/pyflann/bindings/flann_ctypes.py b/src/python/pyflann/bindings/flann_ctypes.py index 226e6c3..fde5c22 100644 --- a/src/python/pyflann/bindings/flann_ctypes.py +++ b/src/python/pyflann/bindings/flann_ctypes.py @@ -115,7 +115,7 @@ class FLANNParameters(CustomStructure): 'random_seed' : -1 } _translation_ = { - "algorithm" : {"linear" : 0, "kdtree" : 1, "kmeans" : 2, "composite" : 3, "autotuned" : 255, "default" : 1}, + "algorithm" : {"linear" : 0, "kdtree" : 1, "kmeans" : 2, "composite" : 3, "kdtree_mt" : 1, "autotuned" : 255, "default" : 1}, "centers_init" : {"random" : 0, "gonzales" : 1, "kmeanspp" : 2, "default" : 0}, "log_level" : {"none" : 0, "fatal" : 1, "error" : 2, "warning" : 3, "info" : 4, "default" : 2} } @@ -233,7 +233,7 @@ flann.compute_ground_truth_float.argtypes = [ flann.test_with_precision.restype = c_float flann.test_with_precision.argtypes = [ - c_void_p, + FLANN_INDEX, ndpointer(float32, ndim = 2, flags='aligned, c_contiguous'), # dataset c_int*2, # dshape ndpointer(float32, ndim = 2, flags='aligned, c_contiguous'), # testset @@ -249,7 +249,7 @@ flann.test_with_precision.argtypes = [ flann.test_with_checks.restype = c_float flann.test_with_checks.argtypes = [ - c_void_p, + FLANN_INDEX, ndpointer(float32, ndim = 2, flags='aligned, c_contiguous'), # dataset c_int*2, # dshape ndpointer(float32, ndim = 2, flags='aligned, c_contiguous'), # testset @@ -391,7 +391,7 @@ class FLANN: dists = empty( (nqpts, num_neighbors), dtype=float32) self.__flann_parameters.update(kwargs) - + flann.flann_find_nearest_neighbors(pts, npts, dim, qpts, nqpts, result, dists, num_neighbors, pointer(self.__flann_parameters)) diff --git a/src/python/pyflann/command/run_test.py b/src/python/pyflann/command/run_test.py index e3f2277..0573dc3 100644 --- a/src/python/pyflann/command/run_test.py +++ b/src/python/pyflann/command/run_test.py @@ -79,17 +79,23 @@ class RunTestCommand(BaseCommand): print 'Reading input dataset from', self.options.input_file dataset = read(self.options.input_file) + import time + print 'Creating index' + start_time = time.clock() flann = FLANN(log_level=self.options.log_level) flann.build_index(dataset, algorithm = self.options.algorithm, trees=self.options.trees, branching=self.options.branching, - iterations=self.options.max_iterations, centers_init=self.options.centers_init) + iterations=self.options.max_iterations, centers_init=self.options.centers_init) + print 'Index creation took', time.clock()-start_time print 'Reading test dataset from', self.options.test_file testset = read(self.options.test_file) - print 'Reading ground truth from matches from', self.options.test_file + print 'Reading ground truth from matches from', self.options.match_file matches = read(self.options.match_file, dtype = int) - + if len(matches.shape)==1: + matches.shape = (matches.shape[0],1) + if self.options.precision>0: checks, time = test_with_precision(flann, dataset, testset, matches, self.options.precision, self.options.nn) else: diff --git a/src/python/pyflann/io/npy_dataset.py b/src/python/pyflann/io/npy_dataset.py index 0f4d17a..2d9a631 100644 --- a/src/python/pyflann/io/npy_dataset.py +++ b/src/python/pyflann/io/npy_dataset.py @@ -46,7 +46,6 @@ def read(filename, dtype = numpy.float32): tmp = numpy.save except: raise FLANNException("Format not supported. You need at least numpy version 1.1") - - data = numpy.load(filename) + data = numpy.load(filename) return data \ No newline at end of file -- GitLab