From f7287d74ea2e8ba734c94740b30932b7482e5965 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Thu, 4 Jul 2019 14:47:23 +0800 Subject: [PATCH] MS-154 1. Add Submodule knowhere and specify branch 2. Integrate Knowhere, update many cmakelist 3. dev knowhere-wrapper 4. dev knowhere-wrapper unittest Former-commit-id: 783a85562b2b5db3b89920b0c68de133a577f245 --- .gitmodules | 4 + cpp/CHANGELOG.md | 11 ++ cpp/README.md | 12 +- cpp/cmake/DefineOptions.cmake | 13 +- cpp/cmake/ThirdPartyPackages.cmake | 57 +++++++ cpp/src/CMakeLists.txt | 17 ++- cpp/src/wrapper/knowhere/data_transfer.cpp | 48 ++++++ cpp/src/wrapper/knowhere/data_transfer.h | 24 +++ cpp/src/wrapper/knowhere/vec_impl.cpp | 95 ++++++++++++ cpp/src/wrapper/knowhere/vec_impl.h | 38 +++++ cpp/src/wrapper/knowhere/vec_index.cpp | 43 ++++++ cpp/src/wrapper/knowhere/vec_index.h | 56 +++++++ cpp/thirdparty/knowhere | 1 + cpp/unittest/CMakeLists.txt | 4 +- cpp/unittest/db/CMakeLists.txt | 4 +- cpp/unittest/index_wrapper/CMakeLists.txt | 23 +++ cpp/unittest/index_wrapper/knowhere_test.cpp | 149 +++++++++++++++++++ cpp/unittest/index_wrapper/utils.cpp | 81 ++++++++++ cpp/unittest/index_wrapper/utils.h | 61 ++++++++ cpp/unittest/metrics/CMakeLists.txt | 4 +- cpp/unittest/server/CMakeLists.txt | 6 +- 21 files changed, 727 insertions(+), 24 deletions(-) create mode 100644 .gitmodules create mode 100644 cpp/src/wrapper/knowhere/data_transfer.cpp create mode 100644 cpp/src/wrapper/knowhere/data_transfer.h create mode 100644 cpp/src/wrapper/knowhere/vec_impl.cpp create mode 100644 cpp/src/wrapper/knowhere/vec_impl.h create mode 100644 cpp/src/wrapper/knowhere/vec_index.cpp create mode 100644 cpp/src/wrapper/knowhere/vec_index.h create mode 160000 cpp/thirdparty/knowhere create mode 100644 cpp/unittest/index_wrapper/CMakeLists.txt create mode 100644 cpp/unittest/index_wrapper/knowhere_test.cpp create mode 100644 cpp/unittest/index_wrapper/utils.cpp create mode 100644 cpp/unittest/index_wrapper/utils.h diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..297cf0e5 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "cpp/thirdparty/knowhere"] + path = cpp/thirdparty/knowhere + url = git@192.168.1.105:xiaojun.lin/knowhere.git + branch = develop diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index bca5826c..949a05c8 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -3,6 +3,17 @@ Please mark all change in change log and use the ticket from JIRA. +# Milvus 0.3.2 (2019-07-10) + +## Bug + +## Improvement + +## New Feature +- MS-154 - Integrate knowhere + +## Task + # Milvus 0.3.1 (2019-07-10) ## Bug diff --git a/cpp/README.md b/cpp/README.md index 1b2f507d..e656f1b3 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -1,13 +1,12 @@ ### Compilation #### Step 1: install necessery tools - Install MySQL centos7 : - yum install gfortran qt4 flex bison mysql-devel + yum install gfortran qt4 flex bison mysql-devel mysql ubuntu16.04 : - sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev + sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev mysql-client If `libmysqlclient_r.so` does not exist after installing MySQL Development Files, you need to create a symbolic link: @@ -21,6 +20,9 @@ cmake_build/src/milvus_server is the server cmake_build/src/libmilvus_engine.a is the static library + git submodule init + git submodule update + cd [sourcecode path]/cpp ./build.sh -t Debug ./build.sh -t Release @@ -53,10 +55,10 @@ If you encounter the following error when building: ### Launch server Set config in cpp/conf/server_config.yaml -Add milvus/bin/lib to LD_LIBRARY_PATH +Add milvus/lib to LD_LIBRARY_PATH ``` -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/bin/lib +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib ``` Then launch server with config: diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index 147663d0..82259d2e 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -68,20 +68,17 @@ define_option(MILVUS_WITH_BZ2 "Build with BZ2 compression" ON) define_option(MILVUS_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON) -define_option(MILVUS_WITH_FAISS "Build with FAISS library" ON) +define_option(MILVUS_WITH_FAISS "Build with FAISS library" OFF) -define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" ON) +define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" OFF) -#define_option_string(MILVUS_FAISS_GPU_ARCH "Specifying which GPU architectures to build against" -# "-gencode=arch=compute_35,code=compute_35 -gencode=arch=compute_52,code=compute_52 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=compute_61") - -define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" ON) +define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" OFF) define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON) define_option(MILVUS_WITH_JSONCONS "Build with JSONCONS" OFF) -define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" ON) +define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" OFF) define_option(MILVUS_WITH_PROMETHEUS "Build with PROMETHEUS library" ON) @@ -101,6 +98,8 @@ define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON) define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON) +define_option(MILVUS_WITH_KNOWHERE "Build with Knowhere" ON) + if(CMAKE_VERSION VERSION_LESS 3.7) set(MILVUS_WITH_ZSTD_DEFAULT OFF) else() diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index 9aa3f621..2c6c61db 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -23,6 +23,7 @@ set(MILVUS_THIRDPARTY_DEPENDENCIES Easylogging++ FAISS GTest + Knowhere JSONCONS LAPACK Lz4 @@ -61,6 +62,8 @@ macro(build_dependency DEPENDENCY_NAME) build_gtest() elseif("${DEPENDENCY_NAME}" STREQUAL "LAPACK") build_lapack() + elseif("${DEPENDENCY_NAME}" STREQUAL "Knowhere") + build_knowhere() elseif("${DEPENDENCY_NAME}" STREQUAL "Lz4") build_lz4() elseif ("${DEPENDENCY_NAME}" STREQUAL "MySQLPP") @@ -242,6 +245,12 @@ else() set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") endif() +if(DEFINED ENV{MILVUS_KNOWHERE_URL}) + set(KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}") +else() + set(KNOWHERE_SOURCE_URL "${CMAKE_SOURCE_DIR}/thirdparty/knowhere") +endif() + if (DEFINED ENV{MILVUS_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{MILVUS_GTEST_URL}") else () @@ -641,6 +650,54 @@ if(MILVUS_WITH_BZ2) include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}") endif() +# ---------------------------------------------------------------------- +# Knowhere + +macro(build_knowhere) + message(STATUS "Building knowhere from source") + set(KNOWHERE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep") + set(KNOWHERE_INCLUDE_DIR "${KNOWHERE_PREFIX}/include") + set(KNOWHERE_STATIC_LIB + "${KNOWHERE_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}knowhere${CMAKE_STATIC_LIBRARY_SUFFIX}") + + set(KNOWHERE_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${KNOWHERE_PREFIX}" + -DCMAKE_INSTALL_LIBDIR=lib + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc + -DCMAKE_BUILD_TYPE=Release) + + externalproject_add(knowhere_ep + URL + ${KNOWHERE_SOURCE_URL} + ${EP_LOG_OPTIONS} + CMAKE_ARGS + ${KNOWHERE_CMAKE_ARGS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} + BUILD_BYPRODUCTS + ${KNOWHERE_STATIC_LIB}) + + file(MAKE_DIRECTORY "${KNOWHERE_INCLUDE_DIR}") + add_library(knowhere STATIC IMPORTED) + set_target_properties( + knowhere + PROPERTIES IMPORTED_LOCATION "${KNOWHERE_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${KNOWHERE_INCLUDE_DIR}") + + add_dependencies(knowhere knowhere_ep) +endmacro() + +if(MILVUS_WITH_KNOWHERE) + resolve_dependency(Knowhere) + + get_target_property(KNOWHERE_INCLUDE_DIR knowhere INTERFACE_INCLUDE_DIRECTORIES) + link_directories(SYSTEM "${KNOWHERE_PREFIX}/lib") + include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}") + include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}/SPTAG/AnnService") +endif() + # ---------------------------------------------------------------------- # Easylogging++ diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index d0029d51..6a7fb683 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -12,6 +12,7 @@ aux_source_directory(utils utils_files) aux_source_directory(db db_files) aux_source_directory(wrapper wrapper_files) aux_source_directory(metrics metrics_files) +aux_source_directory(wrapper/knowhere knowhere_files) aux_source_directory(db/scheduler scheduler_files) aux_source_directory(db/scheduler/context scheduler_context_files) @@ -50,6 +51,7 @@ set(engine_files ${wrapper_files} # metrics/Metrics.cpp ${metrics_files} + ${knowhere_files} ) set(get_sys_info_files @@ -65,14 +67,17 @@ include_directories(thrift/gen-cpp) include_directories(/usr/include/mysql) set(third_party_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack easyloggingpp sqlite thrift yaml-cpp - libgpufaiss.a - faiss - lapack - openblas prometheus-cpp-push prometheus-cpp-pull prometheus-cpp-core @@ -84,6 +89,8 @@ set(third_party_libs snappy zlib zstd + cudart + cublas mysqlpp ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) @@ -97,8 +104,6 @@ if (GPU_VERSION STREQUAL "ON") pthread libgomp.a libgfortran.a - cudart - cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) else() diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/knowhere/data_transfer.cpp new file mode 100644 index 00000000..af5ad212 --- /dev/null +++ b/cpp/src/wrapper/knowhere/data_transfer.cpp @@ -0,0 +1,48 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "data_transfer.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +using namespace zilliz::knowhere; + +DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto id_array = ConstructInt64Array((uint8_t *) ids, nb * sizeof(int64_t)); + std::vector arrays{id_array}; + std::vector array_fields{ConstructInt64Field("id")}; + auto array_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(arrays), array_schema, + std::move(tensors), tensor_schema); + return dataset; +} + +DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(tensors), tensor_schema); + return dataset; +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h new file mode 100644 index 00000000..c99cd1c7 --- /dev/null +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -0,0 +1,24 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "knowhere/adapter/structure.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +extern zilliz::knowhere::DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids); + +extern zilliz::knowhere::DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb); + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp new file mode 100644 index 00000000..e24d470a --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -0,0 +1,95 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "knowhere/index/index.h" +#include "knowhere/index/index_model.h" +#include "knowhere/index/index_type.h" +#include "knowhere/adapter/sptag.h" +#include "knowhere/common/tensor.h" + +#include "vec_impl.h" +#include "data_transfer.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +using namespace zilliz::knowhere; + +void VecIndexImpl::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + auto d = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, d, xb, ids); + + auto preprocessor = index_->BuildPreprocessor(dataset, cfg); + index_->set_preprocessor(preprocessor); + auto model = index_->Train(dataset, cfg); + index_->set_index_model(model); + index_->Add(dataset, cfg); +} + +void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { + // TODO(linxj): Assert index is trained; + + auto d = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, d, xb, ids); + + index_->Add(dataset, cfg); +} + +void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { + // TODO: Assert index is trained; + + auto d = cfg["dim"].as(); + auto k = cfg["k"].as(); + auto dataset = GenDataset(nq, d, xq); + + Config search_cfg; + auto res = index_->Search(dataset, cfg); + auto ids_array = res->array()[0]; + auto dis_array = res->array()[1]; + + //{ + // auto& ids = ids_array; + // auto& dists = dis_array; + // std::stringstream ss_id; + // std::stringstream ss_dist; + // for (auto i = 0; i < 10; i++) { + // for (auto j = 0; j < k; ++j) { + // ss_id << *(ids->data()->GetValues(1, i * k + j)) << " "; + // ss_dist << *(dists->data()->GetValues(1, i * k + j)) << " "; + // } + // ss_id << std::endl; + // ss_dist << std::endl; + // } + // std::cout << "id\n" << ss_id.str() << std::endl; + // std::cout << "dist\n" << ss_dist.str() << std::endl; + //} + + auto p_ids = ids_array->data()->GetValues(1, 0); + auto p_dist = ids_array->data()->GetValues(1, 0); + + // TODO(linxj): avoid copy here. + memcpy(ids, p_ids, sizeof(int64_t) * nq * k); + memcpy(dist, p_dist, sizeof(float) * nq * k); +} + +zilliz::knowhere::BinarySet VecIndexImpl::Serialize() { + return index_->Serialize(); +} + +void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { + index_->Load(index_binary); +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h new file mode 100644 index 00000000..25f7d165 --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -0,0 +1,38 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "knowhere/index/vector_index/vector_index.h" + +#include "vec_index.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +class VecIndexImpl : public VecIndex { + public: + explicit VecIndexImpl(std::shared_ptr index) : index_(std::move(index)) {}; + void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; + void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; + zilliz::knowhere::BinarySet Serialize() override; + void Load(const zilliz::knowhere::BinarySet &index_binary) override; + void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; + + private: + std::shared_ptr index_ = nullptr; +}; + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp new file mode 100644 index 00000000..171388d0 --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -0,0 +1,43 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include "knowhere/index/vector_index/ivf.h" +#include "knowhere/index/vector_index/gpu_ivf.h" +#include "knowhere/index/vector_index/cpu_kdt_rng.h" + +#include "vec_index.h" +#include "vec_impl.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +// TODO(linxj): index_type => enum struct +VecIndexPtr GetVecIndexFactory(const std::string &index_type) { + std::shared_ptr index; + if (index_type == "IVF") { + index = std::make_shared(); + } else if (index_type == "GPUIVF") { + index = std::make_shared(0); + } else if (index_type == "SPTAG") { + index = std::make_shared(); + } + // TODO(linxj): Support NSG + //else if (index_type == "NSG") { + // index = std::make_shared(); + //} + return std::make_shared(index); +} + +VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) { + auto index = GetVecIndexFactory(index_type); + index->Load(index_binary); + return index; +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h new file mode 100644 index 00000000..b03c43a3 --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -0,0 +1,56 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include + +#include "knowhere/common/config.h" +#include "knowhere/common/binary_set.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +// TODO(linxj): jsoncons => rapidjson or other. +using Config = zilliz::knowhere::Config; + +class VecIndex { + public: + virtual void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt = 0, + const float *xt = nullptr) = 0; + + virtual void Add(const long &nb, + const float *xb, + const long *ids, + const Config &cfg = Config()) = 0; + + virtual void Search(const long &nq, + const float *xq, + float *dist, + long *ids, + const Config &cfg = Config()) = 0; + + virtual zilliz::knowhere::BinarySet Serialize() = 0; + + virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0; +}; + +using VecIndexPtr = std::shared_ptr; + +extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); + +extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary); + +} +} +} diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere new file mode 160000 index 00000000..844e6008 --- /dev/null +++ b/cpp/thirdparty/knowhere @@ -0,0 +1 @@ +Subproject commit 844e600834df1eeafc6c7e5936338ae964bd1d41 diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index 38046617..d0d158ec 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -29,7 +29,6 @@ set(unittest_libs easyloggingpp pthread metrics - openblas gfortran prometheus-cpp-pull prometheus-cpp-push @@ -41,7 +40,8 @@ set(unittest_libs add_subdirectory(server) add_subdirectory(db) -add_subdirectory(faiss_wrapper) +add_subdirectory(index_wrapper) +#add_subdirectory(faiss_wrapper) #add_subdirectory(license) add_subdirectory(metrics) add_subdirectory(storage) \ No newline at end of file diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index 5bae9190..b2720f70 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -36,8 +36,10 @@ set(db_test_src cuda_add_executable(db_test ${db_test_src}) set(db_libs - libgpufaiss.a + knowhere faiss + openblas + lapack cudart cublas sqlite3 diff --git a/cpp/unittest/index_wrapper/CMakeLists.txt b/cpp/unittest/index_wrapper/CMakeLists.txt new file mode 100644 index 00000000..51bd97b5 --- /dev/null +++ b/cpp/unittest/index_wrapper/CMakeLists.txt @@ -0,0 +1,23 @@ +include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") +link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) + +set(helper + utils.cpp) + +set(knowhere_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack + tbb + cudart + cublas + ) + +add_executable(knowhere_test knowhere_test.cpp ${knowhere_src} ${helper}) +target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp new file mode 100644 index 00000000..58b0d5a4 --- /dev/null +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -0,0 +1,149 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include + +#include "utils.h" + + +using namespace zilliz::vecwise::engine; +using namespace zilliz::knowhere; + +using ::testing::TestWithParam; +using ::testing::Values; +using ::testing::Combine; + + +class KnowhereWrapperTest + : public TestWithParam<::std::tuple> { + protected: + void SetUp() override { + std::string generator_type; + std::tie(index_type, generator_type, dim, nb, nq, k, train_cfg, search_cfg) = GetParam(); + + //auto generator = GetGenerateFactory(generator_type); + auto generator = std::make_shared(); + generator->GenData(dim, nb, nq, xb, xq, ids, k, gt_ids); + + index_ = GetVecIndexFactory(index_type); + } + + protected: + std::string index_type; + Config train_cfg; + Config search_cfg; + + int dim = 64; + int nb = 10000; + int nq = 10; + int k = 10; + std::vector xb; + std::vector xq; + std::vector ids; + + VecIndexPtr index_ = nullptr; + + // Ground Truth + std::vector gt_ids; +}; + +INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, + Values( + // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] + std::make_tuple("IVF", "Default", + 64, 10000, 10, 10, + Config::object{{"nlist", 100}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} + ), + std::make_tuple("SPTAG", "Default", + 64, 10000, 10, 10, + Config::object{{"TPTNumber", 1}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}} + ) + ) +); + +void AssertAnns(const std::vector >, + const std::vector &res, + const int &nq, + const int &k) { + EXPECT_EQ(res.size(), nq * k); + + for (auto i = 0; i < nq; i++) { + EXPECT_EQ(gt[i * k], res[i * k]); + } + + int match = 0; + for (int i = 0; i < nq; ++i) { + for (int j = 0; j < k; ++j) { + for (int l = 0; l < k; ++l) { + if (gt[i * nq + j] == res[i * nq + l]) match++; + } + } + } + + // TODO(linxj): percision check + EXPECT_GT(float(match/nq*k), 0.5); +} + +TEST_P(KnowhereWrapperTest, base_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + delete[] D; +} + +TEST_P(KnowhereWrapperTest, serialize_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + + { + auto binaryset = index_->Serialize(); + int fileno = 0; + const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; + std::vector filename_list; + std::vector> meta_list; + for (auto &iter: binaryset.binary_map_) { + const std::string &filename = base_name + std::to_string(fileno); + FileIOWriter writer(filename); + writer(iter.second.data, iter.second.size); + + meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + filename_list.push_back(filename); + ++fileno; + } + + BinarySet load_data_list; + for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { + auto bin_size = meta_list[i].second; + FileIOReader reader(filename_list[i]); + std::vector load_data(bin_size); + reader(load_data.data(), bin_size); + load_data_list.Append(meta_list[i].first, load_data); + } + + + res_ids.clear(); + res_ids.resize(nq * k); + auto new_index = GetVecIndexFactory(index_type); + new_index->Load(load_data_list); + new_index->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + } + + delete[] D; +} diff --git a/cpp/unittest/index_wrapper/utils.cpp b/cpp/unittest/index_wrapper/utils.cpp new file mode 100644 index 00000000..e228ae00 --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.cpp @@ -0,0 +1,81 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include "utils.h" + + +DataGenPtr GetGenerateFactory(const std::string &gen_type) { + std::shared_ptr generator; + if (gen_type == "default") { + generator = std::make_shared(); + } + return generator; +} + +void DataGenBase::GenData(const int &dim, const int &nb, const int &nq, + float *xb, float *xq, long *ids, + const int &k, long *gt_ids) { + for (auto i = 0; i < nb; ++i) { + for (auto j = 0; j < dim; ++j) { + //p_data[i * d + j] = float(base + i); + xb[i * dim + j] = drand48(); + } + xb[dim * i] += i / 1000.; + ids[i] = i; + } + for (size_t i = 0; i < nq * dim; ++i) { + xq[i] = xb[i]; + } + + faiss::IndexFlatL2 index(dim); + //index.add_with_ids(nb, xb, ids); + index.add(nb, xb); + float *D = new float[k * nq]; + index.search(nq, xq, k, D, gt_ids); +} + +void DataGenBase::GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids) { + xb.resize(nb * dim); + xq.resize(nq * dim); + ids.resize(nb); + gt_ids.resize(nq * k); + GenData(dim, nb, nq, xb.data(), xq.data(), ids.data(), k, gt_ids.data()); +} + +FileIOReader::FileIOReader(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::in | std::ios::binary); +} + +FileIOReader::~FileIOReader() { + fs.close(); +} + +size_t FileIOReader::operator()(void *ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); +} + +FileIOWriter::FileIOWriter(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::out | std::ios::binary); +} + +FileIOWriter::~FileIOWriter() { + fs.close(); +} + +size_t FileIOWriter::operator()(void *ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); +} diff --git a/cpp/unittest/index_wrapper/utils.h b/cpp/unittest/index_wrapper/utils.h new file mode 100644 index 00000000..bbc52a01 --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.h @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include +#include +#include +#include + + +class DataGenBase; + +using DataGenPtr = std::shared_ptr; + +extern DataGenPtr GetGenerateFactory(const std::string &gen_type); + + +class DataGenBase { + public: + virtual void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids); + + virtual void GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids); +}; + + +class SanityCheck : public DataGenBase { + public: + void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids) override; +}; + +struct FileIOWriter { + std::fstream fs; + std::string name; + + FileIOWriter(const std::string &fname); + ~FileIOWriter(); + size_t operator()(void *ptr, size_t size); +}; + +struct FileIOReader { + std::fstream fs; + std::string name; + + FileIOReader(const std::string &fname); + ~FileIOReader(); + size_t operator()(void *ptr, size_t size); +}; diff --git a/cpp/unittest/metrics/CMakeLists.txt b/cpp/unittest/metrics/CMakeLists.txt index d31e44c0..be5a542d 100644 --- a/cpp/unittest/metrics/CMakeLists.txt +++ b/cpp/unittest/metrics/CMakeLists.txt @@ -62,8 +62,10 @@ set(count_test_src add_executable(metrics_test ${count_test_src} ${require_files} ) target_link_libraries(metrics_test - libgpufaiss.a + knowhere faiss + openblas + lapack cudart cublas sqlite3 diff --git a/cpp/unittest/server/CMakeLists.txt b/cpp/unittest/server/CMakeLists.txt index c4112cda..94a581d2 100644 --- a/cpp/unittest/server/CMakeLists.txt +++ b/cpp/unittest/server/CMakeLists.txt @@ -32,9 +32,11 @@ cuda_add_executable(server_test ) set(require_libs - stdc++ - libgpufaiss.a + knowhere faiss + openblas + lapack + stdc++ cudart cublas sqlite3 -- GitLab