diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 7e40a4885cc0aa92bc79ee3bfefc2d649955278c..b635b17e3b74bdb4618818f76904bf368ce2b4bc 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -8,6 +8,7 @@ aux_source_directory(cache cache_files) aux_source_directory(config config_files) aux_source_directory(server server_files) aux_source_directory(utils utils_files) +aux_source_directory(wrapper wrapper_files) set(service_files thrift/gen-cpp/VecService.cpp @@ -17,7 +18,10 @@ set(service_files set(vecwise_engine_src ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ${cache_files} - ) + ${wrapper_files}) + +include_directories(/usr/local/cuda/include) +find_library(cuda_library cudart cublas HINTS /usr/local/cuda/lib64) add_library(vecwise_engine STATIC ${vecwise_engine_src}) @@ -26,6 +30,7 @@ add_executable(vecwise_engine_server ${server_files} ${utils_files} ${service_files} + ${wrapper_files} ${VECWISE_THIRD_PARTY_BUILD}/include/easylogging++.cc ) @@ -36,5 +41,6 @@ set(dependency_libs boost_filesystem thrift pthread + faiss ) -target_link_libraries(vecwise_engine_server ${dependency_libs}) \ No newline at end of file +target_link_libraries(vecwise_engine_server ${dependency_libs} ${cuda_library}) \ No newline at end of file diff --git a/cpp/src/db/db.h b/cpp/src/db/db.h index abaa4984210d26846770221022da5391bb0b22b9..c14822b041d75d4d6497baa9c6dbd543f6686d00 100644 --- a/cpp/src/db/db.h +++ b/cpp/src/db/db.h @@ -29,6 +29,9 @@ public: virtual Status add_vectors(const std::string& group_id_, size_t n, const float* vectors, IDNumbers& vector_ids_) = 0; + virtual Status search(const std::string& group_id, size_t k, size_t nq, + const float* vectors, QueryResults& results) = 0; + DB() = default; DB(const DB&) = delete; DB& operator=(const DB&) = delete; diff --git a/cpp/src/db/db_impl.cpp b/cpp/src/db/db_impl.cpp index a23fbe1db6c410d11b4e5ef29b155305b44f01da..87c83e7027c79d6ff0b10cdf197ba0095b2bb316 100644 --- a/cpp/src/db/db_impl.cpp +++ b/cpp/src/db/db_impl.cpp @@ -52,6 +52,12 @@ Status DBImpl::add_vectors(const std::string& group_id_, } } +Status DBImpl::search(const std::string& group_id, size_t k, size_t nq, + const float* vectors, QueryResults& results) { + // PXU TODO + return Status::OK(); +} + void DBImpl::start_timer_task(int interval_) { std::thread bg_task(&DBImpl::background_timer_task, this, interval_); bg_task.detach(); diff --git a/cpp/src/db/db_impl.h b/cpp/src/db/db_impl.h index d9e3a3d7c92ea79a4bda83bf48f1f0ebaff00928..b8e45f800610e87da3aa6b1c084e8968a1be7812 100644 --- a/cpp/src/db/db_impl.h +++ b/cpp/src/db/db_impl.h @@ -31,6 +31,9 @@ public: virtual Status add_vectors(const std::string& group_id_, size_t n, const float* vectors, IDNumbers& vector_ids_) override; + virtual Status search(const std::string& group_id, size_t k, size_t nq, + const float* vectors, QueryResults& results) override; + virtual ~DBImpl(); private: diff --git a/cpp/src/db/types.h b/cpp/src/db/types.h index c9ed274defe7537baf471a42d43a37fa674dc1d6..be538b9b1cf484f199425367c9a4e0c6d832d9d2 100644 --- a/cpp/src/db/types.h +++ b/cpp/src/db/types.h @@ -10,6 +10,9 @@ typedef long IDNumber; typedef IDNumber* IDNumberPtr; typedef std::vector IDNumbers; +typedef std::vector QueryResult; +typedef std::vector QueryResults; + } // namespace engine } // namespace vecwise diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ef849bc2f79d5189cfadfd382a5d174b0fe339ed --- /dev/null +++ b/cpp/src/wrapper/Index.cpp @@ -0,0 +1,67 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#ifdef CUDA_VERSION +#include "faiss/gpu/GpuAutoTune.h" +#include "faiss/gpu/StandardGpuResources.h" +#include "faiss/gpu/utils/DeviceUtils.h" +#endif + +#include "Index.h" + +namespace zilliz { +namespace vecwise { +namespace engine { + +using std::string; +using std::unordered_map; +using std::vector; + +Index::Index(const std::shared_ptr &raw_index) { + index_ = raw_index; + dim = index_->d; + ntotal = index_->ntotal; + store_on_gpu = false; +} + +bool Index::reset() { + try { + index_->reset(); + ntotal = index_->ntotal; + } + catch (std::exception &e) { +// LOG(ERROR) << e.what(); + return false; + } + return true; +} + +bool Index::add_with_ids(idx_t n, const float *xdata, const long *xids) { + try { + index_->add_with_ids(n, xdata, xids); + ntotal += n; + } + catch (std::exception &e) { +// LOG(ERROR) << e.what(); + return false; + } + return true; +} + +bool Index::search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const { + try { + index_->search(n, data, k, distances, labels); + } + catch (std::exception &e) { +// LOG(ERROR) << e.what(); + return false; + } + return true; +} + +} +} +} diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h new file mode 100644 index 0000000000000000000000000000000000000000..d5dfa9cb55f9e345318dff0cce9c37cc0a0b2597 --- /dev/null +++ b/cpp/src/wrapper/Index.h @@ -0,0 +1,82 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include +#include +#include +#include + +#include "faiss/AutoTune.h" +#include "faiss/AuxIndexStructures.h" +#include "faiss/gpu/GpuAutoTune.h" +#include "faiss/index_io.h" + +#include "Operand.h" + +namespace zilliz { +namespace vecwise { +namespace engine { + +class Index { + typedef long idx_t; + +public: + int dim; ///< std::vector dimension + idx_t ntotal; ///< total nb of indexed std::vectors + bool store_on_gpu; + + explicit Index(const std::shared_ptr &raw_index); + + virtual bool reset(); + + /** + * @brief Same as add, but stores xids instead of sequential ids. + * + * @param data input matrix, size n * d + * @param if ids is not empty ids for the std::vectors + */ + virtual bool add_with_ids(idx_t n, const float *xdata, const long *xids); + + /** + * @brief for each query std::vector, find its k nearest neighbors in the database + * + * @param n queries size + * @param data query std::vectors + * @param k top k nearest neighbors + * @param distances top k nearest distances + * @param labels neighbors of the queries + */ + virtual bool search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const; + +// virtual bool remove_ids(const faiss::IDSelector &sel, long &nremove, long &location); + +// virtual bool remove_ids_range(const faiss::IDSelector &sel, long &nremove); + +// virtual bool index_display(); + +private: + std::shared_ptr index_ = nullptr; +// std::vector res_; +// std::vector devs_; +// bool usegpu = true; +// int ngpus = 0; +// faiss::gpu::GpuMultipleClonerOptions *options = new faiss::gpu::GpuMultipleClonerOptions(); +}; + +using Index_ptr = std::shared_ptr; + +extern void write_index(const Index_ptr &index, const std::string &file_name); + +extern Index_ptr read_index(const std::string &file_name); + + +} +} +} + diff --git a/cpp/src/wrapper/IndexBuilder.cpp b/cpp/src/wrapper/IndexBuilder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1e28df1914193275ed4eb85ad1cad2471fd3ae42 --- /dev/null +++ b/cpp/src/wrapper/IndexBuilder.cpp @@ -0,0 +1,51 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "mutex" + +#include "IndexBuilder.h" + +namespace zilliz { +namespace vecwise { +namespace engine { + +using std::vector; + +// todo(linxj): use ResourceMgr instead +static std::mutex cpu_resource; + +IndexBuilder::IndexBuilder(const Operand_ptr &opd) { + opd_ = opd; +} + +Index_ptr IndexBuilder::build_all(const long &nb, const vector &xb, + const vector &ids, + const long &nt, const vector &xt) { + std::shared_ptr index = nullptr; + index.reset(faiss::index_factory(opd_->d, opd_->index_type.c_str())); + + { + // currently only cpu resources are used. + std::lock_guard lk(cpu_resource); + if (!index->is_trained) { + nt == 0 || xt.empty() ? index->train(nb, xb.data()) + : index->train(nt, xt.data()); + } + index->add(nb, xb.data()); + index->add_with_ids(nb, xb.data(), ids.data()); // todo(linxj): support add_with_idmap + } + + return std::make_shared(index); +} + +// Be Factory pattern later +IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) { + return std::make_shared(opd); +} + +} +} +} diff --git a/cpp/src/wrapper/IndexBuilder.h b/cpp/src/wrapper/IndexBuilder.h new file mode 100644 index 0000000000000000000000000000000000000000..97479b91e53f91a93f73376c477771f20674c7d6 --- /dev/null +++ b/cpp/src/wrapper/IndexBuilder.h @@ -0,0 +1,51 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "faiss/Index.h" + +#include "Operand.h" +#include "Index.h" + +namespace zilliz { +namespace vecwise { +namespace engine { + +class IndexBuilder { +public: + explicit IndexBuilder(const Operand_ptr &opd); + + Index_ptr build_all(const long &nb, + const std::vector &xb, + const std::vector &ids, + const long &nt = 0, + const std::vector &xt = std::vector()); + + void train(const long &nt, + const std::vector &xt); + + Index_ptr add(const long &nb, + const std::vector &xb, + const std::vector &ids); + + void set_build_option(const Operand_ptr &opd); + + +private: + Operand_ptr opd_ = nullptr; +// std::shared_ptr index_ = nullptr; +}; + +using IndexBuilderPtr = std::shared_ptr; + +extern IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd); + +} +} +} + + diff --git a/cpp/src/wrapper/Operand.cpp b/cpp/src/wrapper/Operand.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5b5ec6ab18fd978d9001870db0240e2e24c6bb3d --- /dev/null +++ b/cpp/src/wrapper/Operand.cpp @@ -0,0 +1,49 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "Operand.h" + +namespace zilliz { +namespace vecwise { +namespace engine { + +std::ostream &operator<<(std::ostream &os, const Operand &obj) { + os << obj.d << " " + << obj.index_type << " " + << obj.preproc << " " + << obj.postproc << " " + << obj.metric_type << " " + << obj.ncent; + return os; +} + +std::istream &operator>>(std::istream &is, Operand &obj) { + is >> obj.d + >> obj.index_type + >> obj.preproc + >> obj.postproc + >> obj.metric_type + >> obj.ncent; + return is; +} + +std::string operand_to_str(const Operand_ptr &opd) { + std::ostringstream ss; + ss << opd; + return ss.str(); +} + +Operand_ptr str_to_operand(const std::string &input) { + std::istringstream is(input); + auto opd = std::make_shared(); + is >> *(opd.get()); + + return opd; +} + +} +} +} diff --git a/cpp/src/wrapper/Operand.h b/cpp/src/wrapper/Operand.h new file mode 100644 index 0000000000000000000000000000000000000000..09fca8cfa7e37e8f037ad44ba5da44d187b15f22 --- /dev/null +++ b/cpp/src/wrapper/Operand.h @@ -0,0 +1,40 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include +#include +#include + +namespace zilliz { +namespace vecwise { +namespace engine { + +struct Operand { + friend std::ostream &operator<<(std::ostream &os, const Operand &obj); + + friend std::istream &operator>>(std::istream &is, Operand &obj); + + int d; + std::string index_type = "IVF13864,Flat"; + std::string preproc; + std::string postproc; + std::string metric_type = "L2"; // L2 / Inner Product + int ncent; +}; + +using Operand_ptr = std::shared_ptr; + +extern std::string operand_to_str(const Operand_ptr &opd); + +extern Operand_ptr str_to_operand(const std::string &input); + + +} +} +} diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index 604861f4a66af7a736cd5a623a8b3f16c554e8d7..454479ea477b7ad4fcc6de89f5c234d90055f992 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -17,4 +17,5 @@ set(unittest_libs pthread) add_subdirectory(cache) -add_subdirectory(log) \ No newline at end of file +add_subdirectory(log) +add_subdirectory(faiss_wrapper) \ No newline at end of file diff --git a/cpp/unittest/faiss_wrapper/CMakeLists.txt b/cpp/unittest/faiss_wrapper/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ed113e5070309cd39bdf00226d84f3c2e7fae695 --- /dev/null +++ b/cpp/unittest/faiss_wrapper/CMakeLists.txt @@ -0,0 +1,26 @@ +#------------------------------------------------------------------------------- +# Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +# Unauthorized copying of this file, via any medium is strictly prohibited. +# Proprietary and confidential. +#------------------------------------------------------------------------------- +include_directories(../../src) + +aux_source_directory(../../src/wrapper wrapper_src) + +# Make sure that your call to link_directories takes place before your call to the relevant add_executable. +include_directories(/usr/local/cuda/include) +link_directories("/usr/local/cuda/lib64") + +set(wrapper_test_src + ${unittest_srcs} + ${wrapper_src} + wrapper_test.cpp) + +add_executable(wrapper_test ${wrapper_test_src}) + +set(faiss_libs + faiss + cudart + cublas + ) +target_link_libraries(wrapper_test ${unittest_libs} ${faiss_libs}) diff --git a/cpp/unittest/faiss_wrapper/wrapper_test.cpp b/cpp/unittest/faiss_wrapper/wrapper_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b410d4a55016806279dac599cb741a63cad2c1ba --- /dev/null +++ b/cpp/unittest/faiss_wrapper/wrapper_test.cpp @@ -0,0 +1,58 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include "wrapper/Operand.h" +#include "wrapper/Index.h" +#include "wrapper/IndexBuilder.h" + +using namespace zilliz::vecwise::engine; + + +TEST(operand_test, Wrapper_Test) { + auto opd = std::make_shared(); + opd->index_type = "IVF16384,Flat"; + opd->d = 256; + + std::cout << opd << std::endl; +} + +TEST(build_test, Wrapper_Test) { + // dimension of the vectors to index + int d = 64; + + // size of the database we plan to index + size_t nb = 100000; + + // make a set of nt training vectors in the unit cube + size_t nt = 150000; + + // a reasonable number of cetroids to index nb vectors + int ncentroids = 25; + + srand48(35); // seed + + std::vector xb(nb * d); + for (size_t i = 0; i < nb * d; i++) { + xb[i] = drand48(); + } + + std::vector ids(nb); + for (size_t i = 0; i < nb; i++) { + ids[i] = drand48(); + } + + std::vector xt(nt * d); + for (size_t i = 0; i < nt * d; i++) { + xt[i] = drand48(); + } + + auto opd = std::make_shared(); + IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); + auto index_1 = index_builder_1->build_all(nb, xb, ids, nt, xt); +} +