提交 13183b38 编写于 作者: M Marius Muja

Moved TBB code from flann::Index to flann::NNIndex

上级 26fd3a55
......@@ -33,10 +33,21 @@
#include <string>
#ifdef TBB
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#include <tbb/atomic.h>
#include <tbb/task_scheduler_init.h>
#endif
#include "flann/general.h"
#include "flann/util/matrix.h"
#include "flann/util/result_set.h"
#include "flann/util/params.h"
#ifdef TBB
#include "flann/tbb/bodies.hpp"
#endif
namespace flann
{
......@@ -76,26 +87,55 @@ public:
assert(dists.cols >= knn);
bool sorted = get_param(params,"sorted",true);
bool use_heap = get_param(params,"use_heap",false);
#ifdef TBB
int cores = get_param(params,"cores",1);
assert(cores >= 1 || cores == -1);
#endif
int count = 0;
if (use_heap) {
KNNResultSet2<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
resultSet.copy(indices[i], dists[i], knn, sorted);
count += resultSet.size();
#ifdef TBB
// Check if we need to do multicore search or stick with singlecore FLANN (less overhead)
if(cores == 1)
{
#endif
if (use_heap) {
KNNResultSet2<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
resultSet.copy(indices[i], dists[i], knn, sorted);
count += resultSet.size();
}
}
}
else {
KNNSimpleResultSet<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
resultSet.copy(indices[i], dists[i], knn, sorted);
count += resultSet.size();
else {
KNNSimpleResultSet<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
resultSet.copy(indices[i], dists[i], knn, sorted);
count += resultSet.size();
}
}
}
#ifdef TBB
}
else
{
// Initialise the task scheduler for the use of Intel TBB parallel constructs
tbb::task_scheduler_init task_sched(cores);
// Make an atomic integer count, such that we can keep track of amount of neighbors found
atomic_count_ = 0;
// Use auto partitioner to choose the optimal grainsize for dividing the query points
flann::parallel_knnSearch<Distance> parallel_knn(queries, indices, dists, knn, params, this, atomic_count_);
tbb::parallel_for(tbb::blocked_range<size_t>(0,queries.rows),
parallel_knn,
tbb::auto_partitioner());
count = atomic_count_;
}
#endif
return count;
}
......@@ -119,34 +159,63 @@ public:
assert(queries.cols == veclen());
bool sorted = get_param(params,"sorted",true);
bool use_heap = get_param(params,"use_heap",false);
if (indices.size() < queries.rows ) indices.resize(queries.rows);
#ifdef TBB
int cores = get_param(params,"cores",1);
assert(cores >= 1 || cores == -1);
#endif
if (indices.size() < queries.rows ) indices.resize(queries.rows);
if (dists.size() < queries.rows ) dists.resize(queries.rows);
int count = 0;
if (use_heap) {
KNNResultSet2<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = std::min(resultSet.size(), knn);
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
count += n;
}
}
else {
KNNSimpleResultSet<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = std::min(resultSet.size(), knn);
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
count += n;
}
}
#ifdef TBB
// Check if we need to do multicore search or stick with singlecore FLANN (less overhead)
if(cores == 1)
{
#endif
if (use_heap) {
KNNResultSet2<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = std::min(resultSet.size(), knn);
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
count += n;
}
}
else {
KNNSimpleResultSet<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = std::min(resultSet.size(), knn);
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
count += n;
}
}
#ifdef TBB
}
else
{
// Initialise the task scheduler for the use of Intel TBB parallel constructs
tbb::task_scheduler_init task_sched(cores);
// Make an atomic integer count, such that we can keep track of amount of neighbors found
atomic_count_ = 0;
// Use auto partitioner to choose the optimal grainsize for dividing the query points
flann::parallel_knnSearch2<Distance> parallel_knn(queries, indices, dists, knn, params, this, atomic_count_);
tbb::parallel_for(tbb::blocked_range<size_t>(0,queries.rows),
parallel_knn,
tbb::auto_partitioner());
count = atomic_count_;
}
#endif
return count;
}
......@@ -164,57 +233,85 @@ public:
float radius, const SearchParams& params)
{
assert(queries.cols == veclen());
int max_neighbors = get_param(params, "max_neighbors", -1);
int count = 0;
if (max_neighbors==0) {
CountRadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
count += resultSet.size();
}
}
else {
size_t num_neighbors = std::min(indices.cols, dists.cols);
bool sorted = get_param(params, "sorted", true);
bool has_max_neighbors = has_param(params,"max_neighbors");
// explicitly indicated to use unbounded radius result set
// or we know there'll be enough room for resulting indices and dists
if (max_neighbors<0 && (has_max_neighbors || num_neighbors>=size())) {
RadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
if (n>num_neighbors) n = num_neighbors;
resultSet.copy(indices[i], dists[i], n, sorted);
// mark the next element in the output buffers as unused
if (n<indices.cols) indices[i][n] = -1;
if (n<dists.cols) dists[i][n] = std::numeric_limits<DistanceType>::infinity();
}
}
else {
if (max_neighbors<0) max_neighbors = num_neighbors;
else max_neighbors = std::min(max_neighbors,(int)num_neighbors);
// number of neighbors limited to max_neighbors
KNNRadiusResultSet<DistanceType> resultSet(radius, max_neighbors);
#ifdef TBB
int cores = get_param(params,"cores",1);
assert(cores >= 1 || cores == -1);
#endif
int count = 0;
#ifdef TBB
// Check if we need to do multicore search or stick with singlecore FLANN (less overhead)
if(cores == 1)
{
#endif
int max_neighbors = get_param(params, "max_neighbors", -1);
if (max_neighbors==0) {
CountRadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
if ((int)n>max_neighbors) n = max_neighbors;
resultSet.copy(indices[i], dists[i], n, sorted);
// mark the next element in the output buffers as unused
if (n<indices.cols) indices[i][n] = -1;
if (n<dists.cols) dists[i][n] = std::numeric_limits<DistanceType>::infinity();
count += resultSet.size();
}
}
}
else {
size_t num_neighbors = std::min(indices.cols, dists.cols);
bool sorted = get_param(params, "sorted", true);
bool has_max_neighbors = has_param(params,"max_neighbors");
// explicitly indicated to use unbounded radius result set
// or we know there'll be enough room for resulting indices and dists
if (max_neighbors<0 && (has_max_neighbors || num_neighbors>=size())) {
RadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
if (n>num_neighbors) n = num_neighbors;
resultSet.copy(indices[i], dists[i], n, sorted);
// mark the next element in the output buffers as unused
if (n<indices.cols) indices[i][n] = -1;
if (n<dists.cols) dists[i][n] = std::numeric_limits<DistanceType>::infinity();
}
}
else {
if (max_neighbors<0) max_neighbors = num_neighbors;
else max_neighbors = std::min(max_neighbors,(int)num_neighbors);
// number of neighbors limited to max_neighbors
KNNRadiusResultSet<DistanceType> resultSet(radius, max_neighbors);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
if ((int)n>max_neighbors) n = max_neighbors;
resultSet.copy(indices[i], dists[i], n, sorted);
// mark the next element in the output buffers as unused
if (n<indices.cols) indices[i][n] = -1;
if (n<dists.cols) dists[i][n] = std::numeric_limits<DistanceType>::infinity();
}
}
}
#ifdef TBB
}
else
{
// Initialise the task scheduler for the use of Intel TBB parallel constructs
tbb::task_scheduler_init task_sched(cores);
// Make an atomic integer count, such that we can keep track of amount of neighbors found
atomic_count_ = 0;
// Use auto partitioner to choose the optimal grainsize for dividing the query points
flann::parallel_radiusSearch<Distance> parallel_radius(queries, indices, dists, radius, params, this, atomic_count_);
tbb::parallel_for(tbb::blocked_range<size_t>(0,queries.rows),
parallel_radius,
tbb::auto_partitioner());
count = atomic_count_;
}
#endif
return count;
}
......@@ -222,51 +319,79 @@ public:
std::vector<std::vector<DistanceType> >& dists, float radius, const SearchParams& params)
{
assert(queries.cols == veclen());
#ifdef TBB
int cores = get_param(params,"cores",1);
assert(cores >= 1 || cores == -1);
#endif
int count = 0;
int max_neighbors = get_param(params, "max_neighbors", -1);
// just count neighbors
if (max_neighbors==0) {
CountRadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
count += resultSet.size();
}
}
else {
bool sorted = get_param(params, "sorted", true);
if (indices.size() < queries.rows ) indices.resize(queries.rows);
if (dists.size() < queries.rows ) dists.resize(queries.rows);
if (max_neighbors<0) {
// search for all neighbors
RadiusResultSet<DistanceType> resultSet(radius);
#ifdef TBB
// Check if we need to do multicore search or stick with singlecore FLANN (less overhead)
if(cores == 1)
{
#endif
int max_neighbors = get_param(params, "max_neighbors", -1);
// just count neighbors
if (max_neighbors==0) {
CountRadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
count += resultSet.size();
}
}
else {
// number of neighbors limited to max_neighbors
KNNRadiusResultSet<DistanceType> resultSet(radius, max_neighbors);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
if ((int)n>max_neighbors) n = max_neighbors;
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
}
}
}
}
else {
bool sorted = get_param(params, "sorted", true);
if (indices.size() < queries.rows ) indices.resize(queries.rows);
if (dists.size() < queries.rows ) dists.resize(queries.rows);
if (max_neighbors<0) {
// search for all neighbors
RadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
}
}
else {
// number of neighbors limited to max_neighbors
KNNRadiusResultSet<DistanceType> resultSet(radius, max_neighbors);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
if ((int)n>max_neighbors) n = max_neighbors;
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
}
}
}
#ifdef TBB
}
else
{
// Initialise the task scheduler for the use of Intel TBB parallel constructs
tbb::task_scheduler_init task_sched(cores);
// Reset atomic count before passing it on to the threads, such that we can keep track of amount of neighbors found
atomic_count_ = 0;
// Use auto partitioner to choose the optimal grainsize for dividing the query points
flann::parallel_radiusSearch2<Distance> parallel_radius(queries, indices, dists, radius, params, this, atomic_count_);
tbb::parallel_for(tbb::blocked_range<size_t>(0,queries.rows),
parallel_radius,
tbb::auto_partitioner());
count = atomic_count_;
}
#endif
return count;
}
......@@ -313,6 +438,13 @@ public:
* \brief Method that searches for nearest-neighbours
*/
virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) = 0;
private:
#ifdef TBB
/** Atomic count variable, passed to the different threads for keeping track of the amount of neighbors found. */
tbb::atomic<int> atomic_count_;
#endif
};
}
......
......@@ -36,14 +36,6 @@
#include <cassert>
#include <cstdio>
#ifdef TBB
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#include <tbb/atomic.h>
#include <tbb/task_scheduler_init.h>
#endif
#include "flann/general.h"
#include "flann/util/matrix.h"
#include "flann/util/params.h"
......@@ -51,10 +43,6 @@
#include "flann/algorithms/all_indices.h"
#ifdef TBB
#include "flann/tbb/bodies.hpp"
#endif
namespace flann
{
......@@ -116,13 +104,8 @@ public:
typedef typename Distance::ElementType ElementType;
typedef typename Distance::ResultType DistanceType;
#ifdef TBB
Index(const Matrix<ElementType>& features, const IndexParams& params, Distance distance = Distance() )
: index_params_(params), atomic_count_()
#else
Index(const Matrix<ElementType>& features, const IndexParams& params, Distance distance = Distance() )
: index_params_(params)
#endif
{
flann_algorithm_t index_type = get_param<flann_algorithm_t>(params,"algorithm");
loaded_ = false;
......@@ -136,7 +119,7 @@ public:
}
}
~Index()
virtual ~Index()
{
delete nnIndex_;
}
......@@ -166,7 +149,7 @@ public:
* \brief Saves the index to a stream
* \param stream The stream to save the index to
*/
virtual void saveIndex(FILE* stream)
void saveIndex(FILE* stream)
{
nnIndex_->saveIndex(stream);
}
......@@ -175,7 +158,7 @@ public:
* \brief Loads the index from a stream
* \param stream The stream from which the index is loaded
*/
virtual void loadIndex(FILE* stream)
void loadIndex(FILE* stream)
{
nnIndex_->loadIndex(stream);
}
......@@ -207,7 +190,7 @@ public:
/**
* \returns The amount of memory (in bytes) used by the index.
*/
virtual int usedMemory() const
int usedMemory() const
{
return nnIndex_->usedMemory();
}
......@@ -235,64 +218,7 @@ public:
size_t knn,
const SearchParams& params)
{
assert(queries.cols == veclen());
assert(indices.rows >= queries.rows);
assert(dists.rows >= queries.rows);
assert(indices.cols >= knn);
assert(dists.cols >= knn);
bool sorted = get_param(params,"sorted",true);
bool use_heap = get_param(params,"use_heap",false);
#ifdef TBB
int cores = get_param(params,"cores",1);
assert(cores >= 1 || cores == -1);
#endif
int count = 0;
#ifdef TBB
// Check if we need to do multicore search or stick with singlecore FLANN (less overhead)
if(cores == 1)
{
#endif
if (use_heap) {
KNNResultSet2<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
nnIndex_->findNeighbors(resultSet, queries[i], params);
resultSet.copy(indices[i], dists[i], knn, sorted);
count += resultSet.size();
}
}
else {
KNNSimpleResultSet<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
nnIndex_->findNeighbors(resultSet, queries[i], params);
resultSet.copy(indices[i], dists[i], knn, sorted);
count += resultSet.size();
}
}
#ifdef TBB
}
else
{
// Initialise the task scheduler for the use of Intel TBB parallel constructs
tbb::task_scheduler_init task_sched(cores);
// Make an atomic integer count, such that we can keep track of amount of neighbors found
atomic_count_ = 0;
// Use auto partitioner to choose the optimal grainsize for dividing the query points
flann::parallel_knnSearch<Distance> parallel_knn(queries, indices, dists, knn, params, nnIndex_, atomic_count_);
tbb::parallel_for(tbb::blocked_range<size_t>(0,queries.rows),
parallel_knn,
tbb::auto_partitioner());
count = atomic_count_;
}
#endif
return count;
return nnIndex_->knnSearch(queries, indices, dists, knn, params);
}
......@@ -310,69 +236,7 @@ public:
size_t knn,
const SearchParams& params)
{
assert(queries.cols == veclen());
bool sorted = get_param(params,"sorted",true);
bool use_heap = get_param(params,"use_heap",false);
#ifdef TBB
int cores = get_param(params,"cores",1);
assert(cores >= 1 || cores == -1);
#endif
if (indices.size() < queries.rows ) indices.resize(queries.rows);
if (dists.size() < queries.rows ) dists.resize(queries.rows);
int count = 0;
#ifdef TBB
// Check if we need to do multicore search or stick with singlecore FLANN (less overhead)
if(cores == 1)
{
#endif
if (use_heap) {
KNNResultSet2<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
nnIndex_->findNeighbors(resultSet, queries[i], params);
size_t n = std::min(resultSet.size(), knn);
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
count += n;
}
}
else {
KNNSimpleResultSet<DistanceType> resultSet(knn);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
nnIndex_->findNeighbors(resultSet, queries[i], params);
size_t n = std::min(resultSet.size(), knn);
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
count += n;
}
}
#ifdef TBB
}
else
{
// Initialise the task scheduler for the use of Intel TBB parallel constructs
tbb::task_scheduler_init task_sched(cores);
// Make an atomic integer count, such that we can keep track of amount of neighbors found
atomic_count_ = 0;
// Use auto partitioner to choose the optimal grainsize for dividing the query points
flann::parallel_knnSearch2<Distance> parallel_knn(queries, indices, dists, knn, params, nnIndex_, atomic_count_);
tbb::parallel_for(tbb::blocked_range<size_t>(0,queries.rows),
parallel_knn,
tbb::auto_partitioner());
count = atomic_count_;
}
#endif
return count;
return nnIndex_->knnSearch(queries, indices, dists, knn, params);
}
......@@ -391,92 +255,7 @@ public:
float radius,
const SearchParams& params)
{
assert(queries.cols == veclen());
#ifdef TBB
int cores = get_param(params,"cores",1);
assert(cores >= 1 || cores == -1);
#endif
int count = 0;
#ifdef TBB
// Check if we need to do multicore search or stick with singlecore FLANN (less overhead)
if(cores == 1)
{
#endif
int max_neighbors = get_param(params, "max_neighbors", -1);
// just count neighbors
if (max_neighbors==0) {
CountRadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
count += resultSet.size();
}
}
else {
size_t num_neighbors = std::min(indices.cols, dists.cols);
bool sorted = get_param(params, "sorted", true);
bool has_max_neighbors = has_param(params,"max_neighbors");
// explicitly indicated to use unbounded radius result set
// or we know there'll be enough room for resulting indices and dists
if (max_neighbors<0 && (has_max_neighbors || num_neighbors>=size())) {
RadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
nnIndex_->findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
if (n>num_neighbors) n = num_neighbors;
resultSet.copy(indices[i], dists[i], n, sorted);
// mark the next element in the output buffers as unused
if (n<indices.cols) indices[i][n] = -1;
if (n<dists.cols) dists[i][n] = std::numeric_limits<DistanceType>::infinity();
}
}
else {
if (max_neighbors<0) max_neighbors = num_neighbors;
else max_neighbors = std::min(max_neighbors,(int)num_neighbors);
// number of neighbors limited to max_neighbors
KNNRadiusResultSet<DistanceType> resultSet(radius, max_neighbors);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
nnIndex_->findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
if ((int)n>max_neighbors) n = max_neighbors;
resultSet.copy(indices[i], dists[i], n, sorted);
// mark the next element in the output buffers as unused
if (n<indices.cols) indices[i][n] = -1;
if (n<dists.cols) dists[i][n] = std::numeric_limits<DistanceType>::infinity();
}
}
}
#ifdef TBB
}
else
{
// Initialise the task scheduler for the use of Intel TBB parallel constructs
tbb::task_scheduler_init task_sched(cores);
// Make an atomic integer count, such that we can keep track of amount of neighbors found
atomic_count_ = 0;
// Use auto partitioner to choose the optimal grainsize for dividing the query points
flann::parallel_radiusSearch<Distance> parallel_radius(queries, indices, dists, radius, params, nnIndex_, atomic_count_);
tbb::parallel_for(tbb::blocked_range<size_t>(0,queries.rows),
parallel_radius,
tbb::auto_partitioner());
count = atomic_count_;
}
#endif
return count;
return nnIndex_->radiusSearch(queries, indices, dists, radius, params);
}
......@@ -495,84 +274,7 @@ public:
float radius,
const SearchParams& params)
{
assert(queries.cols == veclen());
#ifdef TBB
int cores = get_param(params,"cores",1);
assert(cores >= 1 || cores == -1);
#endif
int count = 0;
#ifdef TBB
// Check if we need to do multicore search or stick with singlecore FLANN (less overhead)
if(cores == 1)
{
#endif
int max_neighbors = get_param(params, "max_neighbors", -1);
// just count neighbors
if (max_neighbors==0) {
CountRadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
count += resultSet.size();
}
}
else {
bool sorted = get_param(params, "sorted", true);
if (indices.size() < queries.rows ) indices.resize(queries.rows);
if (dists.size() < queries.rows ) dists.resize(queries.rows);
if (max_neighbors<0) {
// search for all neighbors
RadiusResultSet<DistanceType> resultSet(radius);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
}
}
else {
// number of neighbors limited to max_neighbors
KNNRadiusResultSet<DistanceType> resultSet(radius, max_neighbors);
for (size_t i = 0; i < queries.rows; i++) {
resultSet.clear();
findNeighbors(resultSet, queries[i], params);
size_t n = resultSet.size();
count += n;
if ((int)n>max_neighbors) n = max_neighbors;
indices[i].resize(n);
dists[i].resize(n);
resultSet.copy(&indices[i][0], &dists[i][0], n, sorted);
}
}
}
#ifdef TBB
}
else
{
// Initialise the task scheduler for the use of Intel TBB parallel constructs
tbb::task_scheduler_init task_sched(cores);
// Reset atomic count before passing it on to the threads, such that we can keep track of amount of neighbors found
atomic_count_ = 0;
// Use auto partitioner to choose the optimal grainsize for dividing the query points
flann::parallel_radiusSearch2<Distance> parallel_radius(queries, indices, dists, radius, params, nnIndex_, atomic_count_);
tbb::parallel_for(tbb::blocked_range<size_t>(0,queries.rows),
parallel_radius,
tbb::auto_partitioner());
count = atomic_count_;
}
#endif
return count;
return nnIndex_->radiusSearch(queries, indices, dists, radius, params);
}
/**
......@@ -607,11 +309,6 @@ private:
bool loaded_;
/** Parameters passed to the index */
IndexParams index_params_;
#ifdef TBB
/** Atomic count variable, passed to the different threads for keeping track of the amount of neighbors found.
\note Intel TBB 'catch': must be data member for correct initialization tbb::atomic<T> has no declared constructors !! */
tbb::atomic<int> atomic_count_;
#endif
};
/**
......
......@@ -40,6 +40,9 @@
namespace flann
{
template <typename Distance> class NNIndex;
template<typename Distance>
class parallel_knnSearch
{
......@@ -129,7 +132,7 @@ private:
//! Atomic count variable to keep track of the number of neighbors found
//! \note must be mutable because body will be casted as const in parallel_for
mutable tbb::atomic<int>& count_;
tbb::atomic<int>& count_;
};
......@@ -226,7 +229,7 @@ private:
//! Atomic count variable to keep track of the number of neighbors found
//! \note must be mutable because body will be casted as const in parallel_for
mutable tbb::atomic<int>& count_;
tbb::atomic<int>& count_;
};
......@@ -348,7 +351,7 @@ private:
//! Atomic count variable to keep track of the number of neighbors found
//! \note must be mutable because body will be casted as const in parallel_for
mutable tbb::atomic<int>& count_;
tbb::atomic<int>& count_;
};
......@@ -463,7 +466,7 @@ private:
//! Atomic count variable to keep track of the number of neighbors found
//! \note must be mutable because body will be casted as const in parallel_for
mutable tbb::atomic<int>& count_;
tbb::atomic<int>& count_;
};
}
......
......@@ -67,7 +67,7 @@ protected:
fflush(stdout);
flann::load_from_file(data, "cloud.h5","dataset");
flann::load_from_file(query,"cloud.h5","query");
flann::load_from_file(match,"cloud.h5","match");
flann::load_from_file(match,"cloud.h5","indices");
dists = flann::Matrix<float>(new float[query.rows*nn], query.rows, nn);
indices = flann::Matrix<int>(new int[query.rows*nn], query.rows, nn);
......@@ -118,7 +118,7 @@ TEST_F(FlannTest, HandlesMultiCoreSearch)
int checks = -1;
float eps = 0.0f;
bool sorted = true;
int cores = -1;
int cores = 2;
start_timer("Searching KNN...");
index.knnSearch(query, indices, dists, GetNN(), flann::SearchParams(checks,eps,sorted,cores));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册