From 56bbe40faf8539963c4127935da4cbf05ae6714c Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Sun, 5 May 2019 16:00:13 +0800 Subject: [PATCH] 1. fix operand serialize bug 2. support gpu-build 3. add unittest Former-commit-id: bb36dcb05220d8f0648f282c7e38fe20f4ab3c16 --- cpp/src/wrapper/Index.h | 25 +++--- cpp/src/wrapper/IndexBuilder.cpp | 37 +++++---- cpp/src/wrapper/IndexBuilder.h | 1 - cpp/src/wrapper/Operand.cpp | 6 +- cpp/src/wrapper/Operand.h | 2 +- cpp/unittest/faiss_wrapper/wrapper_test.cpp | 84 ++++++++++++--------- 6 files changed, 87 insertions(+), 68 deletions(-) diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h index 943df37f..b5c5d0cc 100644 --- a/cpp/src/wrapper/Index.h +++ b/cpp/src/wrapper/Index.h @@ -39,11 +39,11 @@ public: virtual bool reset(); /** - * @brief Same as add, but stores xids instead of sequential ids. - * - * @param data input matrix, size n * d - * @param if ids is not empty ids for the std::vectors - */ + * @brief Same as add, but stores xids instead of sequential ids. + * + * @param data input matrix, size n * d + * @param if ids is not empty ids for the std::vectors + */ virtual bool add_with_ids(idx_t n, const float *xdata, const long *xids); /** @@ -57,23 +57,20 @@ public: */ virtual bool search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const; -// virtual bool remove_ids(const faiss::IDSelector &sel, long &nremove, long &location); + //virtual bool search(idx_t n, const std::vector &data, idx_t k, + // std::vector &distances, std::vector &labels) const; -// virtual bool remove_ids_range(const faiss::IDSelector &sel, long &nremove); + //virtual bool remove_ids(const faiss::IDSelector &sel, long &nremove, long &location); + //virtual bool remove_ids_range(const faiss::IDSelector &sel, long &nremove); + //virtual bool index_display(); -// virtual bool index_display(); -// virtual std::shared_ptr data() { return index_; } + virtual const std::shared_ptr& data() const { return index_; } private: friend void write_index(const Index_ptr &index, const std::string &file_name); std::shared_ptr index_ = nullptr; -// std::vector res_; -// std::vector devs_; -// bool usegpu = true; -// int ngpus = 0; -// faiss::gpu::GpuMultipleClonerOptions *options = new faiss::gpu::GpuMultipleClonerOptions(); }; diff --git a/cpp/src/wrapper/IndexBuilder.cpp b/cpp/src/wrapper/IndexBuilder.cpp index 5d0180a3..6d98106b 100644 --- a/cpp/src/wrapper/IndexBuilder.cpp +++ b/cpp/src/wrapper/IndexBuilder.cpp @@ -6,41 +6,52 @@ #include "mutex" +#include +#include "faiss/gpu/GpuIndexIVFFlat.h" +#include "faiss/gpu/GpuAutoTune.h" + #include "IndexBuilder.h" + namespace zilliz { namespace vecwise { namespace engine { using std::vector; -// todo(linxj): use ResourceMgr instead -static std::mutex cpu_resource; +static std::mutex gpu_resource; IndexBuilder::IndexBuilder(const Operand_ptr &opd) { opd_ = opd; } +// Default: build use gpu Index_ptr IndexBuilder::build_all(const long &nb, const float* xb, const long* ids, const long &nt, const float* xt) { - std::shared_ptr index = nullptr; - index.reset(faiss::index_factory(opd_->d, opd_->index_type.c_str())); - + std::shared_ptr host_index = nullptr; { - // currently only cpu resources are used. - std::lock_guard lk(cpu_resource); - if (!index->is_trained) { - nt == 0 || xt == nullptr ? index->train(nb, xb) - : index->train(nt, xt); + // TODO: list support index-type. + faiss::Index *ori_index = faiss::index_factory(opd_->d, opd_->index_type.c_str()); + + std::lock_guard lk(gpu_resource); + faiss::gpu::StandardGpuResources res; + auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index); + if (!device_index->is_trained) { + nt == 0 || xt == nullptr ? device_index->train(nb, xb) + : device_index->train(nt, xt); } - index->add_with_ids(nb, xb, ids); // todo(linxj): support add_with_idmap - } + device_index->add_with_ids(nb, xb, ids); + + host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index)); - return std::make_shared(index); + delete device_index; + delete ori_index; + } + return std::make_shared(host_index); } Index_ptr IndexBuilder::build_all(const long &nb, const vector &xb, diff --git a/cpp/src/wrapper/IndexBuilder.h b/cpp/src/wrapper/IndexBuilder.h index ed5f8a39..e8acc89e 100644 --- a/cpp/src/wrapper/IndexBuilder.h +++ b/cpp/src/wrapper/IndexBuilder.h @@ -43,7 +43,6 @@ public: private: Operand_ptr opd_ = nullptr; -// std::shared_ptr index_ = nullptr; }; using IndexBuilderPtr = std::shared_ptr; diff --git a/cpp/src/wrapper/Operand.cpp b/cpp/src/wrapper/Operand.cpp index 5b5ec6ab..131f821b 100644 --- a/cpp/src/wrapper/Operand.cpp +++ b/cpp/src/wrapper/Operand.cpp @@ -13,9 +13,9 @@ namespace engine { std::ostream &operator<<(std::ostream &os, const Operand &obj) { os << obj.d << " " << obj.index_type << " " + << obj.metric_type << " " << obj.preproc << " " << obj.postproc << " " - << obj.metric_type << " " << obj.ncent; return os; } @@ -23,16 +23,16 @@ std::ostream &operator<<(std::ostream &os, const Operand &obj) { std::istream &operator>>(std::istream &is, Operand &obj) { is >> obj.d >> obj.index_type + >> obj.metric_type >> obj.preproc >> obj.postproc - >> obj.metric_type >> obj.ncent; return is; } std::string operand_to_str(const Operand_ptr &opd) { std::ostringstream ss; - ss << opd; + ss << *opd; return ss.str(); } diff --git a/cpp/src/wrapper/Operand.h b/cpp/src/wrapper/Operand.h index 09fca8cf..047ca917 100644 --- a/cpp/src/wrapper/Operand.h +++ b/cpp/src/wrapper/Operand.h @@ -22,9 +22,9 @@ struct Operand { int d; std::string index_type = "IVF13864,Flat"; + std::string metric_type = "L2"; //> L2 / Inner Product std::string preproc; std::string postproc; - std::string metric_type = "L2"; // L2 / Inner Product int ncent; }; diff --git a/cpp/unittest/faiss_wrapper/wrapper_test.cpp b/cpp/unittest/faiss_wrapper/wrapper_test.cpp index c2d29e30..87a67290 100644 --- a/cpp/unittest/faiss_wrapper/wrapper_test.cpp +++ b/cpp/unittest/faiss_wrapper/wrapper_test.cpp @@ -14,11 +14,21 @@ using namespace zilliz::vecwise::engine; TEST(operand_test, Wrapper_Test) { + using std::cout; + using std::endl; + auto opd = std::make_shared(); - opd->index_type = "IVF16384,Flat"; - opd->d = 256; + opd->index_type = "IDMap,Flat"; + opd->preproc = "opq"; + opd->postproc = "pq"; + opd->metric_type = "L2"; + opd->ncent = 256; + opd->d = 64; + + auto opd_str = operand_to_str(opd); + auto new_opd = str_to_operand(opd_str); - std::cout << opd << std::endl; + assert(new_opd->index_type == opd->index_type); } TEST(build_test, Wrapper_Test) { @@ -68,59 +78,61 @@ TEST(build_test, Wrapper_Test) { //search in first quadrant int nq = 1, k = 10; std::vector xq = {0.5, 0.5, 0.5}; - float* result_dists = new float[k]; - long* result_ids = new long[k]; + float *result_dists = new float[k]; + long *result_ids = new long[k]; index_1->search(nq, xq.data(), k, result_dists, result_ids); - for(int i = 0; i < k; i++) { - if(result_ids[i] < 0) { + for (int i = 0; i < k; i++) { + if (result_ids[i] < 0) { ASSERT_TRUE(false); break; } long id = result_ids[i]; - std::cout << "No." << id << " [" << xb[id*3] << ", " << xb[id*3 + 1] << ", " - << xb[id*3 + 2] <<"] distance = " << result_dists[i] << std::endl; + std::cout << "No." << id << " [" << xb[id * 3] << ", " << xb[id * 3 + 1] << ", " + << xb[id * 3 + 2] << "] distance = " << result_dists[i] << std::endl; //makesure result vector is in first quadrant - ASSERT_TRUE(xb[id*3] > 0.0); - ASSERT_TRUE(xb[id*3 + 1] > 0.0); - ASSERT_TRUE(xb[id*3 + 2] > 0.0); + ASSERT_TRUE(xb[id * 3] > 0.0); + ASSERT_TRUE(xb[id * 3 + 1] > 0.0); + ASSERT_TRUE(xb[id * 3 + 2] > 0.0); } delete[] result_dists; delete[] result_ids; } -TEST(search_test, Wrapper_Test) { - const int dim = 256; +TEST(gpu_build_test, Wrapper_Test) { + using std::vector; - size_t nb = 25000; - size_t nq = 100; - size_t k = 100; - std::vector xb(nb*dim); - std::vector xq(nq*dim); - std::vector ids(nb*dim); + int d = 256; + int nb = 3 * 1000 * 100; + int nq = 100; + vector xb(d * nb); + vector xq(d * nq); + vector ids(nb); std::random_device rd; std::mt19937 gen(rd()); std::uniform_real_distribution<> dis_xt(-1.0, 1.0); - for (size_t i = 0; i < nb*dim; i++) { - xb[i] = dis_xt(gen); - ids[i] = i; - } - for (size_t i = 0; i < nq*dim; i++) { - xq[i] = dis_xt(gen); - } + for (auto &e : xb) { e = float(dis_xt(gen)); } + for (auto &e : xq) { e = float(dis_xt(gen)); } + for (int i = 0; i < nb; ++i) { ids[i] = i; } - // result data - std::vector nns_gt(nq*k); // nns = nearst neg search - std::vector nns(nq*k); - std::vector dis_gt(nq*k); - std::vector dis(nq*k); - faiss::Index* index_gt(faiss::index_factory(dim, "IDMap,Flat")); - index_gt->add_with_ids(nb, xb.data(), ids.data()); - index_gt->search(nq, xq.data(), 10, dis_gt.data(), nns_gt.data()); - std::cout << "data: " << nns_gt[0]; + auto opd = std::make_shared(); + opd->index_type = "IVF256,Flat"; + opd->d = d; + opd->ncent = 256; + IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); + auto index_1 = index_builder_1->build_all(nb, xb.data(), ids.data()); + assert(index_1->ntotal == nb); + assert(index_1->dim == d); + + // sanity check: search 5 first vectors of xb + int k = 1; + vector I(5 * k); + vector D(5 * k); + index_1->search(5, xb.data(), k, D.data(), I.data()); + for (int i = 0; i < 5; ++i) { assert(i == I[i]); } } -- GitLab