diff --git a/ci/jenkinsfile/milvus_build.groovy b/ci/jenkinsfile/milvus_build.groovy index 63c9850d18d85f41e52db5741062f3d9154f00e3..663d2e0f779de6fb67445fa2c8e0460896f2d43a 100644 --- a/ci/jenkinsfile/milvus_build.groovy +++ b/ci/jenkinsfile/milvus_build.groovy @@ -1,5 +1,5 @@ container('milvus-build-env') { - timeout(time: 40, unit: 'MINUTES') { + timeout(time: 120, unit: 'MINUTES') { gitlabCommitStatus(name: 'Build Engine') { dir ("milvus_engine") { try { diff --git a/ci/jenkinsfile/milvus_build_no_ut.groovy b/ci/jenkinsfile/milvus_build_no_ut.groovy index 9947cf2c8482a07d95b2afaffb267c5ca3b0ae2b..cea9c8550bdb6b24f2870e5c5e1937b57ff2a669 100644 --- a/ci/jenkinsfile/milvus_build_no_ut.groovy +++ b/ci/jenkinsfile/milvus_build_no_ut.groovy @@ -1,5 +1,5 @@ container('milvus-build-env') { - timeout(time: 40, unit: 'MINUTES') { + timeout(time: 120, unit: 'MINUTES') { gitlabCommitStatus(name: 'Build Engine') { dir ("milvus_engine") { try { diff --git a/core/CHANGELOG.md b/core/CHANGELOG.md index 791e62b05bc0fa44c06496d9c1126ab5daff61fb..bc92b1875feaeb968a897e33cbb05f9dedf72aa8 100644 --- a/core/CHANGELOG.md +++ b/core/CHANGELOG.md @@ -22,6 +22,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-624 - Search vectors failed if time ranges long enough - MS-652 - IVFSQH quantization double free - MS-605 - Server going down during searching vectors +- MS-654 - Describe index timeout when building index ## Improvement - MS-552 - Add and change the easylogging library @@ -43,6 +44,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-614 - Preload table at startup - MS-626 - Refactor DataObj to support cache any type data - MS-648 - Improve unittest +- MS-655 - Upgrade SPTAG ## New Feature - MS-627 - Integrate new index: IVFSQHybrid diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index cde5d3f90edb8b074e40249f3456aa1ea1381095..d2092eb018444d3e207b42596d5903bc11ced4dc 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -125,10 +125,6 @@ set(MILVUS_SOURCE_DIR ${PROJECT_SOURCE_DIR}) set(MILVUS_BINARY_DIR ${PROJECT_BINARY_DIR}) set(MILVUS_ENGINE_SRC ${PROJECT_SOURCE_DIR}/src) -if (CUSTOMIZATION) - add_definitions(-DCUSTOMIZATION) -endif (CUSTOMIZATION) - include(ExternalProject) include(DefineOptions) include(BuildUtils) diff --git a/core/cmake/BuildUtils.cmake b/core/cmake/BuildUtils.cmake index 265cdd0cbcd5021a3edf569fd9d7eda6c5431c9b..68cd22ae58b9e37f97c586de37fad77d676f4e83 100644 --- a/core/cmake/BuildUtils.cmake +++ b/core/cmake/BuildUtils.cmake @@ -88,6 +88,11 @@ function(ExternalProject_Create_Cache project_name package_file install_path cac file(REMOVE ${package_file}) endif() + string(REGEX REPLACE "(.+)/.+$" "\\1" package_dir ${package_file}) + if(NOT EXISTS ${package_dir}) + file(MAKE_DIRECTORY ${package_dir}) + endif() + message(STATUS "Will create cached package file: ${package_file}") ExternalProject_Add_Step(${project_name} package diff --git a/core/cmake/ThirdPartyPackages.cmake b/core/cmake/ThirdPartyPackages.cmake index a7632831b07cf809b0dad9f33e90547a6c9238c7..d22556bbf03e9bdf3cc320b41aee292cc63cefd3 100644 --- a/core/cmake/ThirdPartyPackages.cmake +++ b/core/cmake/ThirdPartyPackages.cmake @@ -158,6 +158,10 @@ if(USE_JFROG_CACHE STREQUAL "ON") endif() set(THIRDPARTY_PACKAGE_CACHE "${THIRDPARTY_DIR}/cache") + if(NOT EXISTS ${THIRDPARTY_PACKAGE_CACHE}) + message(STATUS "Will create cached directory: ${THIRDPARTY_PACKAGE_CACHE}") + file(MAKE_DIRECTORY ${THIRDPARTY_PACKAGE_CACHE}) + endif() endif() macro(resolve_dependency DEPENDENCY_NAME) @@ -324,8 +328,8 @@ if(DEFINED ENV{MILVUS_SQLITE_ORM_URL}) set(SQLITE_ORM_SOURCE_URL "$ENV{MILVUS_SQLITE_ORM_URL}") else() set(SQLITE_ORM_SOURCE_URL - "http://192.168.1.105:6060/Test/sqlite_orm/-/archive/master/sqlite_orm-master.zip") -# "https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.zip") +# "http://192.168.1.105:6060/Test/sqlite_orm/-/archive/master/sqlite_orm-master.zip") + "https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.zip") endif() set(SQLITE_ORM_MD5 "ba9a405a8a1421c093aa8ce988ff8598") @@ -372,7 +376,7 @@ else() set(GRPC_SOURCE_URL "https://github.com/youny626/grpc-milvus/archive/${GRPC_VERSION}.zip") endif() -set(GRPC_MD5 "fdd2656424c0e0e046b21354513fc70f") +set(GRPC_MD5 "0362ba219f59432c530070b5f5c3df73") # ---------------------------------------------------------------------- diff --git a/core/coverage.sh b/core/coverage.sh index bba733e1f8261b79789206a120af93e4b6194aa6..8d95696e4aedcb2264f434fa4384e45c0daf44d8 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -39,27 +39,6 @@ mysql_exc "GRANT ALL PRIVILEGES ON ${MYSQL_DB_NAME}.* TO '${MYSQL_USER_NAME}'@'% mysql_exc "FLUSH PRIVILEGES;" mysql_exc "USE ${MYSQL_DB_NAME};" -MYSQL_USER_NAME=root -MYSQL_PASSWORD=Fantast1c -MYSQL_HOST='192.168.1.194' -MYSQL_PORT='3306' - -MYSQL_DB_NAME=milvus_`date +%s%N` - -function mysql_exc() -{ - cmd=$1 - mysql -h${MYSQL_HOST} -u${MYSQL_USER_NAME} -p${MYSQL_PASSWORD} -e "${cmd}" - if [ $? -ne 0 ]; then - echo "mysql $cmd run failed" - fi -} - -mysql_exc "CREATE DATABASE IF NOT EXISTS ${MYSQL_DB_NAME};" -mysql_exc "GRANT ALL PRIVILEGES ON ${MYSQL_DB_NAME}.* TO '${MYSQL_USER_NAME}'@'%';" -mysql_exc "FLUSH PRIVILEGES;" -mysql_exc "USE ${MYSQL_DB_NAME};" - # get baseline ${LCOV_CMD} -c -i -d ${DIR_GCNO} -o "${FILE_INFO_BASE}" if [ $? -ne 0 ]; then diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index b119a517d1d7473523a4790fde3b7716d5662a6c..e5dbad6dbcc3d901b60fd3597ac7fa8f396e91d5 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -26,6 +26,11 @@ include_directories(${MILVUS_ENGINE_SRC}/grpc/gen-milvus) #this statement must put here, since the CORE_INCLUDE_DIRS is defined in code/CMakeList.txt add_subdirectory(index) + +if (CUSTOMIZATION) + add_definitions(-DCUSTOMIZATION) +endif (CUSTOMIZATION) + set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE) foreach (dir ${CORE_INCLUDE_DIRS}) include_directories(${dir}) @@ -182,8 +187,6 @@ target_link_libraries(milvus_server install(TARGETS milvus_server DESTINATION bin) install(FILES - ${CMAKE_SOURCE_DIR}/src/index/thirdparty/tbb/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX} - ${CMAKE_SOURCE_DIR}/src/index/thirdparty/tbb/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX}.2 ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3 ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3.2.4 diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 6d2391225514185dcf0612d65399f2caffe9ef50..324d304e2ad516e8df5d2221f50837dbde37d768 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -251,11 +251,6 @@ DBImpl::InsertVectors(const std::string& table_id, uint64_t n, const float* vect Status status; milvus::server::CollectInsertMetrics metrics(n, status); status = mem_mgr_->InsertVectors(table_id, n, vectors, vector_ids); - // std::chrono::microseconds time_span = - // std::chrono::duration_cast(end_time - start_time); - // double average_time = double(time_span.count()) / n; - - // ENGINE_LOG_DEBUG << "Insert vectors to cache finished"; return status; } @@ -359,7 +354,7 @@ DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t npr return Status(DB_ERROR, "Milsvus server is shutdown!"); } - ENGINE_LOG_DEBUG << "Query by dates for table: " << table_id; + ENGINE_LOG_DEBUG << "Query by dates for table: " << table_id << " date range count: " << dates.size(); // get all table files from table meta::DatePartionedTableFilesSchema files; @@ -377,7 +372,7 @@ DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t npr } cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info before query - status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, dates, results); + status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, results); cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info after query return status; } @@ -389,7 +384,7 @@ DBImpl::Query(const std::string& table_id, const std::vector& file_ return Status(DB_ERROR, "Milsvus server is shutdown!"); } - ENGINE_LOG_DEBUG << "Query by file ids for table: " << table_id; + ENGINE_LOG_DEBUG << "Query by file ids for table: " << table_id << " date range count: " << dates.size(); // get specified files std::vector ids; @@ -418,7 +413,7 @@ DBImpl::Query(const std::string& table_id, const std::vector& file_ } cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info before query - status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, dates, results); + status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, results); cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info after query return status; } @@ -437,14 +432,13 @@ DBImpl::Size(uint64_t& result) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Status DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq, - uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) { + uint64_t nprobe, const float* vectors, QueryResults& results) { server::CollectQueryMetrics metrics(nq); TimeRecorder rc(""); // step 1: get files to search - ENGINE_LOG_DEBUG << "Engine query begin, index file count: " << files.size() - << " date range count: " << dates.size(); + ENGINE_LOG_DEBUG << "Engine query begin, index file count: " << files.size(); scheduler::SearchJobPtr job = std::make_shared(0, k, nq, nprobe, vectors); for (auto& file : files) { scheduler::TableFileSchemaPtr file_ptr = std::make_shared(file); @@ -458,32 +452,7 @@ DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& fi return job->GetStatus(); } - // step 3: print time cost information - // double load_cost = context->LoadCost(); - // double search_cost = context->SearchCost(); - // double reduce_cost = context->ReduceCost(); - // std::string load_info = TimeRecorder::GetTimeSpanStr(load_cost); - // std::string search_info = TimeRecorder::GetTimeSpanStr(search_cost); - // std::string reduce_info = TimeRecorder::GetTimeSpanStr(reduce_cost); - // if(search_cost > 0.0 || reduce_cost > 0.0) { - // double total_cost = load_cost + search_cost + reduce_cost; - // double load_percent = load_cost/total_cost; - // double search_percent = search_cost/total_cost; - // double reduce_percent = reduce_cost/total_cost; - // - // ENGINE_LOG_DEBUG << "Engine load index totally cost: " << load_info - // << " percent: " << load_percent*100 << "%"; - // ENGINE_LOG_DEBUG << "Engine search index totally cost: " << search_info - // << " percent: " << search_percent*100 << "%"; - // ENGINE_LOG_DEBUG << "Engine reduce topk totally cost: " << reduce_info - // << " percent: " << reduce_percent*100 << "%"; - // } else { - // ENGINE_LOG_DEBUG << "Engine load cost: " << load_info - // << " search cost: " << search_info - // << " reduce cost: " << reduce_info; - // } - - // step 4: construct results + // step 3: construct results results = job->GetResult(); rc.ElapseFromBegin("Engine query totally cost"); @@ -701,14 +670,13 @@ DBImpl::BackgroundMergeFiles(const std::string& table_id) { return status; } - bool has_merge = false; for (auto& kv : raw_files) { auto files = kv.second; if (files.size() < options_.merge_trigger_number_) { ENGINE_LOG_DEBUG << "Files number not greater equal than merge trigger number, skip merge action"; continue; } - has_merge = true; + MergeFiles(table_id, kv.first, kv.second); if (shutting_down_.load(std::memory_order_acquire)) { @@ -776,127 +744,6 @@ DBImpl::StartBuildIndexTask(bool force) { } } -Status -DBImpl::BuildIndex(const meta::TableFileSchema& file) { - ExecutionEnginePtr to_index = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_, - (MetricType)file.metric_type_, file.nlist_); - if (to_index == nullptr) { - ENGINE_LOG_ERROR << "Invalid engine type"; - return Status(DB_ERROR, "Invalid engine type"); - } - - try { - // step 1: load index - Status status = to_index->Load(options_.insert_cache_immediately_); - if (!status.ok()) { - ENGINE_LOG_ERROR << "Failed to load index file: " << status.ToString(); - return status; - } - - // step 2: create table file - meta::TableFileSchema table_file; - table_file.table_id_ = file.table_id_; - table_file.date_ = file.date_; - table_file.file_type_ = - meta::TableFileSchema::NEW_INDEX; // for multi-db-path, distribute index file averagely to each path - status = meta_ptr_->CreateTableFile(table_file); - if (!status.ok()) { - ENGINE_LOG_ERROR << "Failed to create table file: " << status.ToString(); - return status; - } - - // step 3: build index - std::shared_ptr index; - - try { - server::CollectBuildIndexMetrics metrics; - index = to_index->BuildIndex(table_file.location_, (EngineType)table_file.engine_type_); - if (index == nullptr) { - table_file.file_type_ = meta::TableFileSchema::TO_DELETE; - status = meta_ptr_->UpdateTableFile(table_file); - ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ - << " to to_delete"; - - return status; - } - } catch (std::exception& ex) { - // typical error: out of gpu memory - std::string msg = "BuildIndex encounter exception: " + std::string(ex.what()); - ENGINE_LOG_ERROR << msg; - - table_file.file_type_ = meta::TableFileSchema::TO_DELETE; - status = meta_ptr_->UpdateTableFile(table_file); - ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete"; - - std::cout << "ERROR: failed to build index, index file is too large or gpu memory is not enough" - << std::endl; - - return Status(DB_ERROR, msg); - } - - // step 4: if table has been deleted, dont save index file - bool has_table = false; - meta_ptr_->HasTable(file.table_id_, has_table); - if (!has_table) { - meta_ptr_->DeleteTableFiles(file.table_id_); - return Status::OK(); - } - - // step 5: save index file - try { - index->Serialize(); - } catch (std::exception& ex) { - // typical error: out of disk space or permition denied - std::string msg = "Serialize index encounter exception: " + std::string(ex.what()); - ENGINE_LOG_ERROR << msg; - - table_file.file_type_ = meta::TableFileSchema::TO_DELETE; - status = meta_ptr_->UpdateTableFile(table_file); - ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete"; - - std::cout << "ERROR: failed to persist index file: " << table_file.location_ - << ", possible out of disk space" << std::endl; - - return Status(DB_ERROR, msg); - } - - // step 6: update meta - table_file.file_type_ = meta::TableFileSchema::INDEX; - table_file.file_size_ = index->PhysicalSize(); - table_file.row_count_ = index->Count(); - - auto origin_file = file; - origin_file.file_type_ = meta::TableFileSchema::BACKUP; - - meta::TableFilesSchema update_files = {table_file, origin_file}; - status = meta_ptr_->UpdateTableFiles(update_files); - if (status.ok()) { - ENGINE_LOG_DEBUG << "New index file " << table_file.file_id_ << " of size " << index->PhysicalSize() - << " bytes" - << " from file " << origin_file.file_id_; - - if (options_.insert_cache_immediately_) { - index->Cache(); - } - } else { - // failed to update meta, mark the new file as to_delete, don't delete old file - origin_file.file_type_ = meta::TableFileSchema::TO_INDEX; - status = meta_ptr_->UpdateTableFile(origin_file); - ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << origin_file.file_id_ << " to to_index"; - - table_file.file_type_ = meta::TableFileSchema::TO_DELETE; - status = meta_ptr_->UpdateTableFile(table_file); - ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete"; - } - } catch (std::exception& ex) { - std::string msg = "Build index encounter exception: " + std::string(ex.what()); - ENGINE_LOG_ERROR << msg; - return Status(DB_ERROR, msg); - } - - return Status::OK(); -} - void DBImpl::BackgroundBuildIndex() { ENGINE_LOG_TRACE << "Background build index thread start"; @@ -921,17 +768,6 @@ DBImpl::BackgroundBuildIndex() { ENGINE_LOG_ERROR << "Building index failed: " << status.ToString(); } } - // for (auto &file : to_index_files) { - // status = BuildIndex(file); - // if (!status.ok()) { - // ENGINE_LOG_ERROR << "Building index for " << file.id_ << " failed: " << status.ToString(); - // } - // - // if (shutting_down_.load(std::memory_order_acquire)) { - // ENGINE_LOG_DEBUG << "Server will shutdown, skip build index action"; - // break; - // } - // } ENGINE_LOG_TRACE << "Background build index thread exit"; } diff --git a/core/src/db/DBImpl.h b/core/src/db/DBImpl.h index 865b3dfa5361e4ccfa22fb1df223d3d5badd13fa..e1e030cc32651c874efecf24719e20ecfac423f6 100644 --- a/core/src/db/DBImpl.h +++ b/core/src/db/DBImpl.h @@ -107,7 +107,7 @@ class DBImpl : public DB { private: Status QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq, - uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results); + uint64_t nprobe, const float* vectors, QueryResults& results); void BackgroundTimerTask(); @@ -133,9 +133,6 @@ class DBImpl : public DB { void BackgroundBuildIndex(); - Status - BuildIndex(const meta::TableFileSchema&); - Status MemSerialize(); diff --git a/core/src/index/cmake/BuildUtilsCore.cmake b/core/src/index/cmake/BuildUtilsCore.cmake index ce798c4bb096e470757895b5c3cfd063f9842d13..905b173429d4dee2c0b41dac32674dab0516e639 100644 --- a/core/src/index/cmake/BuildUtilsCore.cmake +++ b/core/src/index/cmake/BuildUtilsCore.cmake @@ -88,6 +88,11 @@ function(ExternalProject_Create_Cache project_name package_file install_path cac file(REMOVE ${package_file}) endif() + string(REGEX REPLACE "(.+)/.+$" "\\1" package_dir ${package_file}) + if(NOT EXISTS ${package_dir}) + file(MAKE_DIRECTORY ${package_dir}) + endif() + message(STATUS "Will create cached package file: ${package_file}") ExternalProject_Add_Step(${project_name} package diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index 12d5c2a71168429d87060e0d1d181f9f74a27018..a7d9b9302fcd185ed59948ed21072b173753b933 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -125,6 +125,10 @@ endif() if(USE_JFROG_CACHE STREQUAL "ON") set(JFROG_ARTFACTORY_CACHE_URL "${JFROG_ARTFACTORY_URL}/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${KNOWHERE_BUILD_ARCH}/${BUILD_TYPE}") set(THIRDPARTY_PACKAGE_CACHE "${THIRDPARTY_DIR}/cache") + if(NOT EXISTS ${THIRDPARTY_PACKAGE_CACHE}) + message(STATUS "Will create cached directory: ${THIRDPARTY_PACKAGE_CACHE}") + file(MAKE_DIRECTORY ${THIRDPARTY_PACKAGE_CACHE}) + endif() endif() macro(resolve_dependency DEPENDENCY_NAME) @@ -240,6 +244,7 @@ if(CUSTOMIZATION) message(STATUS "Check the remote cache file ${FAISS_SOURCE_URL}. return code = ${return_code}") if (NOT return_code EQUAL 0) set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz") + set(CUSTOMIZATION FALSE PARENT_SCOPE) endif() else() set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz") diff --git a/core/src/index/knowhere/CMakeLists.txt b/core/src/index/knowhere/CMakeLists.txt index cb0d5895d103c127aa126aad50a84823d152cb5d..caa4a804c406ea7bbf0477dd33c9aee8bffab1f2 100644 --- a/core/src/index/knowhere/CMakeLists.txt +++ b/core/src/index/knowhere/CMakeLists.txt @@ -1,7 +1,3 @@ -set(TBB_DIR ${CORE_SOURCE_DIR}/thirdparty/tbb) -set(TBB_LIBRARIES ${TBB_DIR}/libtbb.so) -include_directories(${TBB_DIR}/include) - include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64) @@ -60,7 +56,6 @@ set(index_srcs set(depend_libs SPTAGLibStatic - ${TBB_LIBRARIES} faiss openblas lapack @@ -107,13 +102,6 @@ INSTALL(FILES ${OPENBLAS_REAL_STATIC_LIB} DESTINATION lib ) -INSTALL(FILES ${CORE_SOURCE_DIR}/thirdparty/tbb/libtbb.so.2 - DESTINATION lib - ) -INSTALL(FILES ${CORE_SOURCE_DIR}/thirdparty/tbb/libtbb.so - DESTINATION lib - ) - set(CORE_INCLUDE_DIRS ${CORE_SOURCE_DIR}/knowhere ${CORE_SOURCE_DIR}/thirdparty @@ -122,7 +110,6 @@ set(CORE_INCLUDE_DIRS ${FAISS_INCLUDE_DIR} ${OPENBLAS_INCLUDE_DIR} ${LAPACK_INCLUDE_DIR} - ${CORE_SOURCE_DIR}/thirdparty/tbb/include ) set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE) @@ -132,7 +119,6 @@ set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE) # ${ARROW_INCLUDE_DIR}/arrow # ${FAISS_PREFIX}/include/faiss # ${OPENBLAS_INCLUDE_DIR}/ -# ${CORE_SOURCE_DIR}/thirdparty/tbb/include/tbb # DESTINATION # include) # diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index 3408e480e7d59aea12839403cd4b37b01446895b..34c81991c93ae50ed2dd989804b8dab2bbd95bd2 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -27,6 +27,7 @@ namespace knowhere { #ifdef CUSTOMIZATION + IndexModelPtr IVFSQHybrid::Train(const DatasetPtr& dataset, const Config& config) { auto build_cfg = std::dynamic_pointer_cast(config); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexKDT.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexKDT.cpp index c23a6ef61d81298b2a53f66889addf05561cc2da..1bd45075e3f0f87bc3423391a5813eb61ac572b0 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexKDT.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexKDT.cpp @@ -36,42 +36,47 @@ BinarySet CPUKDTRNG::Serialize() { std::vector index_blobs; std::vector index_len; - index_ptr_->SaveIndexToMemory(index_blobs, index_len); + + // TODO(zirui): dev + // index_ptr_->SaveIndexToMemory(index_blobs, index_len); BinarySet binary_set; - auto sample = std::make_shared(); - sample.reset(static_cast(index_blobs[0])); - auto tree = std::make_shared(); - tree.reset(static_cast(index_blobs[1])); - auto graph = std::make_shared(); - graph.reset(static_cast(index_blobs[2])); - auto metadata = std::make_shared(); - metadata.reset(static_cast(index_blobs[3])); - - binary_set.Append("samples", sample, index_len[0]); - binary_set.Append("tree", tree, index_len[1]); - binary_set.Append("graph", graph, index_len[2]); - binary_set.Append("metadata", metadata, index_len[3]); + // + // auto sample = std::make_shared(); + // sample.reset(static_cast(index_blobs[0])); + // auto tree = std::make_shared(); + // tree.reset(static_cast(index_blobs[1])); + // auto graph = std::make_shared(); + // graph.reset(static_cast(index_blobs[2])); + // auto metadata = std::make_shared(); + // metadata.reset(static_cast(index_blobs[3])); + // + // binary_set.Append("samples", sample, index_len[0]); + // binary_set.Append("tree", tree, index_len[1]); + // binary_set.Append("graph", graph, index_len[2]); + // binary_set.Append("metadata", metadata, index_len[3]); return binary_set; } void CPUKDTRNG::Load(const BinarySet& binary_set) { - std::vector index_blobs; - - auto samples = binary_set.GetByName("samples"); - index_blobs.push_back(samples->data.get()); - - auto tree = binary_set.GetByName("tree"); - index_blobs.push_back(tree->data.get()); - - auto graph = binary_set.GetByName("graph"); - index_blobs.push_back(graph->data.get()); - - auto metadata = binary_set.GetByName("metadata"); - index_blobs.push_back(metadata->data.get()); - - index_ptr_->LoadIndexFromMemory(index_blobs); + // TODO(zirui): dev + + // std::vector index_blobs; + // + // auto samples = binary_set.GetByName("samples"); + // index_blobs.push_back(samples->data.get()); + // + // auto tree = binary_set.GetByName("tree"); + // index_blobs.push_back(tree->data.get()); + // + // auto graph = binary_set.GetByName("graph"); + // index_blobs.push_back(graph->data.get()); + // + // auto metadata = binary_set.GetByName("metadata"); + // index_blobs.push_back(metadata->data.get()); + // + // index_ptr_->LoadIndexFromMemory(index_blobs); } // PreprocessorPtr diff --git a/core/src/index/thirdparty/SPTAG/.gitignore b/core/src/index/thirdparty/SPTAG/.gitignore index 9b51454741adf85af88aad86c06494f382f5a68c..973785834cc50aeffde00b6c9a05acc5caad17a0 100644 --- a/core/src/index/thirdparty/SPTAG/.gitignore +++ b/core/src/index/thirdparty/SPTAG/.gitignore @@ -89,5 +89,3 @@ dkms.conf /Wrappers/inc/AnnClient.java /AnnService.users - Copy.props /.vs -Release/ -Debug/ diff --git a/core/src/index/thirdparty/SPTAG/AnnService/CMakeLists.txt b/core/src/index/thirdparty/SPTAG/AnnService/CMakeLists.txt index 4c1abe0189438da7ed2d7728e357e3e395adfc7f..fffc5ce426e8cb7280782a237767054c3f0e7475 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/CMakeLists.txt +++ b/core/src/index/thirdparty/SPTAG/AnnService/CMakeLists.txt @@ -1,44 +1,41 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -file(GLOB HDR_FILES ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/Common/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/BKT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/KDT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/*.h) -file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/AnnService/src/Core/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/Common/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/BKT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/KDT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/*.cpp) +file(GLOB HDR_FILES ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/Common/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/BKT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/KDT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/VectorSetReaders/*.h) +file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/AnnService/src/Core/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/Common/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/BKT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/KDT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/VectorSetReaders/*.cpp) include_directories(${PROJECT_SOURCE_DIR}/AnnService) add_library (SPTAGLib SHARED ${SRC_FILES} ${HDR_FILES}) -target_link_libraries (SPTAGLib ${TBB_LIBRARIES}) +target_link_libraries (SPTAGLib) add_library (SPTAGLibStatic STATIC ${SRC_FILES} ${HDR_FILES}) set_target_properties(SPTAGLibStatic PROPERTIES OUTPUT_NAME SPTAGLib) file(GLOB SERVER_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/Server/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h) file(GLOB SERVER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/Server/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp) add_executable (server ${SERVER_FILES} ${SERVER_HDR_FILES}) -target_link_libraries(server ${Boost_LIBRARIES} ${TBB_LIBRARIES}) +target_link_libraries(server ${Boost_LIBRARIES}) file(GLOB CLIENT_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h) file(GLOB CLIENT_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp) add_executable (client ${CLIENT_FILES} ${CLIENT_HDR_FILES}) -target_link_libraries(client ${Boost_LIBRARIES} ${TBB_LIBRARIES}) +target_link_libraries(client ${Boost_LIBRARIES}) file(GLOB AGG_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/Aggregator/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h) file(GLOB AGG_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/Aggregator/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp) add_executable (aggregator ${AGG_FILES} ${AGG_HDR_FILES}) -target_link_libraries(aggregator ${Boost_LIBRARIES} ${TBB_LIBRARIES}) +target_link_libraries(aggregator ${Boost_LIBRARIES}) -file(GLOB BUILDER_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/IndexBuilder/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/IndexBuilder/VectorSetReaders/*.h) -file(GLOB BUILDER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexBuilder/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/IndexBuilder/VectorSetReaders/*.cpp) +file(GLOB BUILDER_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/IndexBuilder/*.h) +file(GLOB BUILDER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexBuilder/*.cpp) add_executable (indexbuilder ${BUILDER_FILES} ${BUILDER_HDR_FILES}) -target_link_libraries(indexbuilder ${Boost_LIBRARIES} ${TBB_LIBRARIES}) +target_link_libraries(indexbuilder ${Boost_LIBRARIES}) file(GLOB SEARCHER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexSearcher/*.cpp) add_executable (indexsearcher ${SEARCHER_FILES} ${HDR_FILES}) -target_link_libraries(indexsearcher ${Boost_LIBRARIES} ${TBB_LIBRARIES}) +target_link_libraries(indexsearcher ${Boost_LIBRARIES}) install(TARGETS SPTAGLib SPTAGLibStatic server client aggregator indexbuilder indexsearcher RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - -install(DIRECTORY inc DESTINATION include/sptag - FILES_MATCHING PATTERN "*.h") \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/AnnService/CoreLibrary.vcxproj b/core/src/index/thirdparty/SPTAG/AnnService/CoreLibrary.vcxproj index a0e884b3df23b9fe2008c415c599097406b29981..08921f2444367a1dc34ad28fcc2a36392fa2f541 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/CoreLibrary.vcxproj +++ b/core/src/index/thirdparty/SPTAG/AnnService/CoreLibrary.vcxproj @@ -149,25 +149,29 @@ + + + + + - @@ -176,18 +180,13 @@ + + - - - - This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - - \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/AnnService/CoreLibrary.vcxproj.filters b/core/src/index/thirdparty/SPTAG/AnnService/CoreLibrary.vcxproj.filters index c411e8cef540aec8a3797b0a713d0afbe2b9a738..94f27df3f19abd715f864fcbf59cd8cc3048c1fe 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/CoreLibrary.vcxproj.filters +++ b/core/src/index/thirdparty/SPTAG/AnnService/CoreLibrary.vcxproj.filters @@ -38,6 +38,12 @@ {8fb36afb-73ed-4c3d-8c9b-c3581d80c5d1} + + {f7bc0bc7-1af5-4870-b8ee-fabdbabdb4c4} + + + {5c1449e0-38b7-4c82-976e-cbdc488d3fb5} + @@ -52,6 +58,9 @@ Header Files\Core + + Header Files\Core + Header Files\Core @@ -130,11 +139,20 @@ Header Files\Core\Common + + Header Files\Helper + + + Header Files\Helper + + + Header Files\Helper\VectorSetReaders + + + Header Files\Helper + - - Source Files\Core - Source Files\Core @@ -171,6 +189,12 @@ Source Files\Core\Common + + Source Files\Helper\VectorSetReaders + + + Source Files\Helper + diff --git a/core/src/index/thirdparty/SPTAG/AnnService/IndexBuilder.vcxproj b/core/src/index/thirdparty/SPTAG/AnnService/IndexBuilder.vcxproj index 931f301e107f6f133f0224d65fbb5dcb47d17ab9..a5d05fb47375f6d7324dcbd261eda7625d790a26 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/IndexBuilder.vcxproj +++ b/core/src/index/thirdparty/SPTAG/AnnService/IndexBuilder.vcxproj @@ -139,15 +139,11 @@ - - - - @@ -161,7 +157,6 @@ - @@ -174,6 +169,5 @@ - \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/AnnService/IndexBuilder.vcxproj.filters b/core/src/index/thirdparty/SPTAG/AnnService/IndexBuilder.vcxproj.filters index dcd29861c455b0771a222943a3d53ce16d26928e..0733fae1c1baaa3399457f5694d072e97eb0dade 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/IndexBuilder.vcxproj.filters +++ b/core/src/index/thirdparty/SPTAG/AnnService/IndexBuilder.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -9,12 +9,6 @@ {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;xsd - - {cf68b421-6a65-44f2-bf43-438b13940d7d} - - - {41ac91f9-6b6d-4341-8791-12f672d6ad5c} - @@ -23,27 +17,15 @@ Header Files - - Header Files - - - Header Files\VectorSetReaders - Source Files - - Source Files - - + Source Files - - Source Files\VectorSetReaders - - + Source Files diff --git a/core/src/index/thirdparty/SPTAG/AnnService/IndexSearcher.vcxproj b/core/src/index/thirdparty/SPTAG/AnnService/IndexSearcher.vcxproj index 9cd6418959dcfd48e9e33b5692ef136238ac1437..266ac576b30ed5eafda7eb864ffbb6f6d5228607 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/IndexSearcher.vcxproj +++ b/core/src/index/thirdparty/SPTAG/AnnService/IndexSearcher.vcxproj @@ -154,7 +154,6 @@ - @@ -167,6 +166,5 @@ - \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/AnnService/Server.vcxproj b/core/src/index/thirdparty/SPTAG/AnnService/Server.vcxproj index c2336176eefcae866fa374a69756f6c821b7ad8a..d830f3bc0de44a1c9db74d3c40fbd09e2efbd1ab 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/Server.vcxproj +++ b/core/src/index/thirdparty/SPTAG/AnnService/Server.vcxproj @@ -137,7 +137,6 @@ - @@ -150,6 +149,5 @@ - \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/Index.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/Index.h index fde5b8ec88968adb6c2f7d4a29625e10fc1a4a79..0722afc1a8c199b4444211d70adf65ed8a68d658 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/Index.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/Index.h @@ -15,12 +15,12 @@ #include "../Common/WorkSpacePool.h" #include "../Common/RelativeNeighborhoodGraph.h" #include "../Common/BKTree.h" +#include "inc/Helper/ConcurrentSet.h" #include "inc/Helper/SimpleIniReader.h" #include "inc/Helper/StringConvert.h" #include #include -#include namespace SPTAG { @@ -48,35 +48,38 @@ namespace SPTAG std::string m_sBKTFilename; std::string m_sGraphFilename; std::string m_sDataPointsFilename; + std::string m_sDeleteDataPointsFilename; - std::mutex m_dataLock; // protect data and graph - tbb::concurrent_unordered_set m_deletedID; + std::mutex m_dataAddLock; // protect data and graph + Helper::Concurrent::ConcurrentSet m_deletedID; + float m_fDeletePercentageForRefine; std::unique_ptr m_workSpacePool; int m_iNumberOfThreads; DistCalcMethod m_iDistCalcMethod; - float(*m_fComputeDistance)(const T* pX, const T* pY, int length); + float(*m_fComputeDistance)(const T* pX, const T* pY, DimensionType length); int m_iMaxCheck; int m_iThresholdOfNumberOfContinuousNoBetterPropagation; int m_iNumberOfInitialDynamicPivots; int m_iNumberOfOtherDynamicPivots; public: - Index() - { + Index() + { #define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \ VarName = DefaultValue; \ #include "inc/Core/BKT/ParameterDefinitionList.h" #undef DefineBKTParameter - m_fComputeDistance = COMMON::DistanceCalcSelector(m_iDistCalcMethod); - } + m_pSamples.SetName("Vector"); + m_fComputeDistance = COMMON::DistanceCalcSelector(m_iDistCalcMethod); + } ~Index() {} - inline int GetNumSamples() const { return m_pSamples.R(); } - inline int GetFeatureDim() const { return m_pSamples.C(); } + inline SizeType GetNumSamples() const { return m_pSamples.R(); } + inline DimensionType GetFeatureDim() const { return m_pSamples.C(); } inline int GetCurrMaxCheck() const { return m_iMaxCheck; } inline int GetNumThreads() const { return m_iNumberOfThreads; } @@ -85,25 +88,41 @@ namespace SPTAG inline VectorValueType GetVectorValueType() const { return GetEnumValueType(); } inline float ComputeDistance(const void* pX, const void* pY) const { return m_fComputeDistance((const T*)pX, (const T*)pY, m_pSamples.C()); } - inline const void* GetSample(const int idx) const { return (void*)m_pSamples[idx]; } - - ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension); - - ErrorCode SaveIndexToMemory(std::vector& p_indexBlobs, std::vector& p_indexBlobsLen); - ErrorCode LoadIndexFromMemory(const std::vector& p_indexBlobs); - - ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout); - ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader); + inline const void* GetSample(const SizeType idx) const { return (void*)m_pSamples[idx]; } + inline bool ContainSample(const SizeType idx) const { return !m_deletedID.contains(idx); } + inline bool NeedRefine() const { return m_deletedID.size() >= (size_t)(GetNumSamples() * m_fDeletePercentageForRefine); } + std::shared_ptr> BufferSize() const + { + std::shared_ptr> buffersize(new std::vector); + buffersize->push_back(m_pSamples.BufferSize()); + buffersize->push_back(m_pTrees.BufferSize()); + buffersize->push_back(m_pGraph.BufferSize()); + buffersize->push_back(m_deletedID.bufferSize()); + return std::move(buffersize); + } + + ErrorCode SaveConfig(std::ostream& p_configout) const; + ErrorCode SaveIndexData(const std::string& p_folderPath); + ErrorCode SaveIndexData(const std::vector& p_indexStreams); + + ErrorCode LoadConfig(Helper::IniReader& p_reader); + ErrorCode LoadIndexData(const std::string& p_folderPath); + ErrorCode LoadIndexDataFromMemory(const std::vector& p_indexBlobs); + + ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension); ErrorCode SearchIndex(QueryResult &p_query) const; - ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension); - ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum); + ErrorCode AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start = nullptr); + ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum); + ErrorCode DeleteIndex(const SizeType& p_id); ErrorCode SetParameter(const char* p_param, const char* p_value); std::string GetParameter(const char* p_param) const; - private: ErrorCode RefineIndex(const std::string& p_folderPath); - void SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const; + ErrorCode RefineIndex(const std::vector& p_indexStreams); + + private: + void SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet &p_deleted) const; void SearchIndexWithoutDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space) const; }; } // namespace BKT diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/ParameterDefinitionList.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/ParameterDefinitionList.h index 7ad17fcbb1425cbc4ca7949f1f1d8f7e7a41fa3f..3f6f9e0222f82ec93adf05fda79e48385f5dfff5 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/ParameterDefinitionList.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/ParameterDefinitionList.h @@ -7,6 +7,7 @@ DefineBKTParameter(m_sBKTFilename, std::string, std::string("tree.bin"), "TreeFilePath") DefineBKTParameter(m_sGraphFilename, std::string, std::string("graph.bin"), "GraphFilePath") DefineBKTParameter(m_sDataPointsFilename, std::string, std::string("vectors.bin"), "VectorFilePath") +DefineBKTParameter(m_sDeleteDataPointsFilename, std::string, std::string("deletes.bin"), "DeleteVectorFilePath") DefineBKTParameter(m_pTrees.m_iTreeNumber, int, 1L, "BKTNumber") DefineBKTParameter(m_pTrees.m_iBKTKmeansK, int, 32L, "BKTKmeansK") @@ -14,11 +15,11 @@ DefineBKTParameter(m_pTrees.m_iBKTLeafSize, int, 8L, "BKTLeafSize") DefineBKTParameter(m_pTrees.m_iSamples, int, 1000L, "Samples") -DefineBKTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TpTreeNumber") +DefineBKTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TPTNumber") DefineBKTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize") DefineBKTParameter(m_pGraph.m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTpTreeSplit") -DefineBKTParameter(m_pGraph.m_iNeighborhoodSize, int, 32L, "NeighborhoodSize") +DefineBKTParameter(m_pGraph.m_iNeighborhoodSize, DimensionType, 32L, "NeighborhoodSize") DefineBKTParameter(m_pGraph.m_iNeighborhoodScale, int, 2L, "GraphNeighborhoodScale") DefineBKTParameter(m_pGraph.m_iCEFScale, int, 2L, "GraphCEFScale") DefineBKTParameter(m_pGraph.m_iRefineIter, int, 0L, "RefineIterations") @@ -28,6 +29,7 @@ DefineBKTParameter(m_pGraph.m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckFor DefineBKTParameter(m_iNumberOfThreads, int, 1L, "NumberOfThreads") DefineBKTParameter(m_iDistCalcMethod, SPTAG::DistCalcMethod, SPTAG::DistCalcMethod::Cosine, "DistCalcMethod") +DefineBKTParameter(m_fDeletePercentageForRefine, float, 0.4F, "DeletePercentageForRefine") DefineBKTParameter(m_iMaxCheck, int, 8192L, "MaxCheck") DefineBKTParameter(m_iThresholdOfNumberOfContinuousNoBetterPropagation, int, 3L, "ThresholdOfNumberOfContinuousNoBetterPropagation") DefineBKTParameter(m_iNumberOfInitialDynamicPivots, int, 50L, "NumberOfInitialDynamicPivots") diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common.h index 91c6239b32e8e0b236960f03362f1f3d261bb7da..02182a4bf27b8b26c4f3541bf8ae1aacd6c91e7e 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common.h @@ -56,9 +56,10 @@ inline bool fileexists(const char* path) { namespace SPTAG { +typedef std::int32_t SizeType; +typedef std::int32_t DimensionType; -typedef std::uint32_t SizeType; - +const SizeType MaxSize = (std::numeric_limits::max)(); const float MinDist = (std::numeric_limits::min)(); const float MaxDist = (std::numeric_limits::max)(); const float Epsilon = 0.000000001f; @@ -76,11 +77,6 @@ public: #endif }; -// Type of number index. -typedef std::int32_t IndexType; -static_assert(std::is_integral::value, "IndexType must be integral type."); - - enum class ErrorCode : std::uint16_t { #define DefineErrorCode(Name, Value) Name = Value, diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/BKTree.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/BKTree.h index 366ca08499942e12547719474f27f5cf8756de39..56583be1649ed026dad8da472a001b9fd8a27fba 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/BKTree.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/BKTree.h @@ -24,34 +24,34 @@ namespace SPTAG // node type for storing BKT struct BKTNode { - int centerid; - int childStart; - int childEnd; + SizeType centerid; + SizeType childStart; + SizeType childEnd; - BKTNode(int cid = -1) : centerid(cid), childStart(-1), childEnd(-1) {} + BKTNode(SizeType cid = -1) : centerid(cid), childStart(-1), childEnd(-1) {} }; template struct KmeansArgs { int _K; - int _D; + DimensionType _D; int _T; T* centers; - int* counts; + SizeType* counts; float* newCenters; - int* newCounts; - char* label; - int* clusterIdx; + SizeType* newCounts; + int* label; + SizeType* clusterIdx; float* clusterDist; T* newTCenters; - KmeansArgs(int k, int dim, int datasize, int threadnum) : _K(k), _D(dim), _T(threadnum) { + KmeansArgs(int k, DimensionType dim, SizeType datasize, int threadnum) : _K(k), _D(dim), _T(threadnum) { centers = new T[k * dim]; - counts = new int[k]; + counts = new SizeType[k]; newCenters = new float[threadnum * k * dim]; - newCounts = new int[threadnum * k]; - label = new char[datasize]; - clusterIdx = new int[threadnum * k]; + newCounts = new SizeType[threadnum * k]; + label = new int[datasize]; + clusterIdx = new SizeType[threadnum * k]; clusterDist = new float[threadnum * k]; newTCenters = new T[k * dim]; } @@ -68,7 +68,7 @@ namespace SPTAG } inline void ClearCounts() { - memset(newCounts, 0, sizeof(int) * _T * _K); + memset(newCounts, 0, sizeof(SizeType) * _T * _K); } inline void ClearCenters() { @@ -82,17 +82,17 @@ namespace SPTAG } } - void Shuffle(std::vector& indices, int first, int last) { - int* pos = new int[_K]; + void Shuffle(std::vector& indices, SizeType first, SizeType last) { + SizeType* pos = new SizeType[_K]; pos[0] = first; for (int k = 1; k < _K; k++) pos[k] = pos[k - 1] + newCounts[k - 1]; for (int k = 0; k < _K; k++) { if (newCounts[k] == 0) continue; - int i = pos[k]; + SizeType i = pos[k]; while (newCounts[k] > 0) { - int swapid = pos[(int)(label[i])] + newCounts[(int)(label[i])] - 1; - newCounts[(int)(label[i])]--; + SizeType swapid = pos[label[i]] + newCounts[label[i]] - 1; + newCounts[label[i]]--; std::swap(indices[i], indices[swapid]); std::swap(label[i], label[swapid]); } @@ -114,59 +114,59 @@ namespace SPTAG m_iSamples(other.m_iSamples) {} ~BKTree() {} - inline const BKTNode& operator[](int index) const { return m_pTreeRoots[index]; } - inline BKTNode& operator[](int index) { return m_pTreeRoots[index]; } + inline const BKTNode& operator[](SizeType index) const { return m_pTreeRoots[index]; } + inline BKTNode& operator[](SizeType index) { return m_pTreeRoots[index]; } - inline int size() const { return (int)m_pTreeRoots.size(); } + inline SizeType size() const { return (SizeType)m_pTreeRoots.size(); } - inline const std::unordered_map& GetSampleMap() const { return m_pSampleCenterMap; } + inline const std::unordered_map& GetSampleMap() const { return m_pSampleCenterMap; } template - void BuildTrees(VectorIndex* index, std::vector* indices = nullptr) + void BuildTrees(VectorIndex* index, std::vector* indices = nullptr) { struct BKTStackItem { - int index, first, last; - BKTStackItem(int index_, int first_, int last_) : index(index_), first(first_), last(last_) {} + SizeType index, first, last; + BKTStackItem(SizeType index_, SizeType first_, SizeType last_) : index(index_), first(first_), last(last_) {} }; std::stack ss; - std::vector localindices; + std::vector localindices; if (indices == nullptr) { localindices.resize(index->GetNumSamples()); - for (int i = 0; i < index->GetNumSamples(); i++) localindices[i] = i; + for (SizeType i = 0; i < index->GetNumSamples(); i++) localindices[i] = i; } else { localindices.assign(indices->begin(), indices->end()); } - KmeansArgs args(m_iBKTKmeansK, index->GetFeatureDim(), (int)localindices.size(), omp_get_num_threads()); + KmeansArgs args(m_iBKTKmeansK, index->GetFeatureDim(), (SizeType)localindices.size(), omp_get_num_threads()); m_pSampleCenterMap.clear(); for (char i = 0; i < m_iTreeNumber; i++) { std::random_shuffle(localindices.begin(), localindices.end()); - m_pTreeStart.push_back((int)m_pTreeRoots.size()); - m_pTreeRoots.push_back(BKTNode((int)localindices.size())); + m_pTreeStart.push_back((SizeType)m_pTreeRoots.size()); + m_pTreeRoots.push_back(BKTNode((SizeType)localindices.size())); std::cout << "Start to build BKTree " << i + 1 << std::endl; - ss.push(BKTStackItem(m_pTreeStart[i], 0, (int)localindices.size())); + ss.push(BKTStackItem(m_pTreeStart[i], 0, (SizeType)localindices.size())); while (!ss.empty()) { BKTStackItem item = ss.top(); ss.pop(); - int newBKTid = (int)m_pTreeRoots.size(); + SizeType newBKTid = (SizeType)m_pTreeRoots.size(); m_pTreeRoots[item.index].childStart = newBKTid; if (item.last - item.first <= m_iBKTLeafSize) { - for (int j = item.first; j < item.last; j++) { + for (SizeType j = item.first; j < item.last; j++) { m_pTreeRoots.push_back(BKTNode(localindices[j])); } } else { // clustering the data into BKTKmeansK clusters int numClusters = KmeansClustering(index, localindices, item.first, item.last, args); if (numClusters <= 1) { - int end = min(item.last + 1, (int)localindices.size()); + SizeType end = min(item.last + 1, (SizeType)localindices.size()); std::sort(localindices.begin() + item.first, localindices.begin() + end); m_pTreeRoots[item.index].centerid = localindices[item.first]; m_pTreeRoots[item.index].childStart = -m_pTreeRoots[item.index].childStart; - for (int j = item.first + 1; j < end; j++) { + for (SizeType j = item.first + 1; j < end; j++) { m_pTreeRoots.push_back(BKTNode(localindices[j])); m_pSampleCenterMap[localindices[j]] = m_pTreeRoots[item.index].centerid; } @@ -181,53 +181,36 @@ namespace SPTAG } } } - m_pTreeRoots[item.index].childEnd = (int)m_pTreeRoots.size(); + m_pTreeRoots[item.index].childEnd = (SizeType)m_pTreeRoots.size(); } std::cout << i + 1 << " BKTree built, " << m_pTreeRoots.size() - m_pTreeStart[i] << " " << localindices.size() << std::endl; } } - bool SaveTrees(void **pKDTMemFile, int64_t &len) const + inline std::uint64_t BufferSize() const { - int treeNodeSize = (int)m_pTreeRoots.size(); - - size_t size = sizeof(int) + - sizeof(int) * m_iTreeNumber + - sizeof(int) + - sizeof(BKTNode) * treeNodeSize; - char *mem = (char*)malloc(size); - if (mem == NULL) return false; - - auto ptr = mem; - *(int*)ptr = m_iTreeNumber; - ptr += sizeof(int); - - memcpy(ptr, m_pTreeStart.data(), sizeof(int) * m_iTreeNumber); - ptr += sizeof(int) * m_iTreeNumber; - - *(int*)ptr = treeNodeSize; - ptr += sizeof(int); - - memcpy(ptr, m_pTreeRoots.data(), sizeof(BKTNode) * treeNodeSize); - *pKDTMemFile = mem; - len = size; + return sizeof(int) + sizeof(SizeType) * m_iTreeNumber + + sizeof(SizeType) + sizeof(BKTNode) * m_pTreeRoots.size(); + } + bool SaveTrees(std::ostream& p_outstream) const + { + p_outstream.write((char*)&m_iTreeNumber, sizeof(int)); + p_outstream.write((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber); + SizeType treeNodeSize = (SizeType)m_pTreeRoots.size(); + p_outstream.write((char*)&treeNodeSize, sizeof(SizeType)); + p_outstream.write((char*)m_pTreeRoots.data(), sizeof(BKTNode) * treeNodeSize); + std::cout << "Save BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; return true; } bool SaveTrees(std::string sTreeFileName) const { std::cout << "Save BKT to " << sTreeFileName << std::endl; - FILE *fp = fopen(sTreeFileName.c_str(), "wb"); - if (fp == NULL) return false; - - fwrite(&m_iTreeNumber, sizeof(int), 1, fp); - fwrite(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp); - int treeNodeSize = (int)m_pTreeRoots.size(); - fwrite(&treeNodeSize, sizeof(int), 1, fp); - fwrite(m_pTreeRoots.data(), sizeof(BKTNode), treeNodeSize, fp); - fclose(fp); - std::cout << "Save BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; + std::ofstream output(sTreeFileName, std::ios::binary); + if (!output.is_open()) return false; + SaveTrees(output); + output.close(); return true; } @@ -236,31 +219,32 @@ namespace SPTAG m_iTreeNumber = *((int*)pBKTMemFile); pBKTMemFile += sizeof(int); m_pTreeStart.resize(m_iTreeNumber); - memcpy(m_pTreeStart.data(), pBKTMemFile, sizeof(int) * m_iTreeNumber); - pBKTMemFile += sizeof(int)*m_iTreeNumber; + memcpy(m_pTreeStart.data(), pBKTMemFile, sizeof(SizeType) * m_iTreeNumber); + pBKTMemFile += sizeof(SizeType)*m_iTreeNumber; - int treeNodeSize = *((int*)pBKTMemFile); - pBKTMemFile += sizeof(int); + SizeType treeNodeSize = *((SizeType*)pBKTMemFile); + pBKTMemFile += sizeof(SizeType); m_pTreeRoots.resize(treeNodeSize); memcpy(m_pTreeRoots.data(), pBKTMemFile, sizeof(BKTNode) * treeNodeSize); + std::cout << "Load BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; return true; } bool LoadTrees(std::string sTreeFileName) { std::cout << "Load BKT From " << sTreeFileName << std::endl; - FILE *fp = fopen(sTreeFileName.c_str(), "rb"); - if (fp == NULL) return false; + std::ifstream input(sTreeFileName, std::ios::binary); + if (!input.is_open()) return false; - fread(&m_iTreeNumber, sizeof(int), 1, fp); + input.read((char*)&m_iTreeNumber, sizeof(int)); m_pTreeStart.resize(m_iTreeNumber); - fread(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp); + input.read((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber); - int treeNodeSize; - fread(&treeNodeSize, sizeof(int), 1, fp); + SizeType treeNodeSize; + input.read((char*)&treeNodeSize, sizeof(SizeType)); m_pTreeRoots.resize(treeNodeSize); - fread(m_pTreeRoots.data(), sizeof(BKTNode), treeNodeSize, fp); - fclose(fp); + input.read((char*)m_pTreeRoots.data(), sizeof(BKTNode) * treeNodeSize); + input.close(); std::cout << "Load BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; return true; } @@ -274,9 +258,9 @@ namespace SPTAG p_space.m_SPTQueue.insert(COMMON::HeapCell(m_pTreeStart[i], p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(node.centerid)))); } else { - for (int begin = node.childStart; begin < node.childEnd; begin++) { - int index = m_pTreeRoots[begin].centerid; - p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index)))); + for (SizeType begin = node.childStart; begin < node.childEnd; begin++) { + SizeType index = m_pTreeRoots[begin].centerid; + p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index)))); } } } @@ -301,8 +285,8 @@ namespace SPTAG if (!p_space.CheckAndSet(tnode.centerid)) { p_space.m_NGQueue.insert(COMMON::HeapCell(tnode.centerid, bcell.distance)); } - for (int begin = tnode.childStart; begin < tnode.childEnd; begin++) { - int index = m_pTreeRoots[begin].centerid; + for (SizeType begin = tnode.childStart; begin < tnode.childEnd; begin++) { + SizeType index = m_pTreeRoots[begin].centerid; p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index)))); } } @@ -313,24 +297,24 @@ namespace SPTAG template float KmeansAssign(VectorIndex* p_index, - std::vector& indices, - const int first, const int last, KmeansArgs& args, const bool updateCenters) const { + std::vector& indices, + const SizeType first, const SizeType last, KmeansArgs& args, const bool updateCenters) const { float currDist = 0; int threads = omp_get_num_threads(); float lambda = (updateCenters) ? COMMON::Utils::GetBase() * COMMON::Utils::GetBase() / (100.0f * (last - first)) : 0.0f; - int subsize = (last - first - 1) / threads + 1; + SizeType subsize = (last - first - 1) / threads + 1; #pragma omp parallel for for (int tid = 0; tid < threads; tid++) { - int istart = first + tid * subsize; - int iend = min(first + (tid + 1) * subsize, last); - int *inewCounts = args.newCounts + tid * m_iBKTKmeansK; + SizeType istart = first + tid * subsize; + SizeType iend = min(first + (tid + 1) * subsize, last); + SizeType *inewCounts = args.newCounts + tid * m_iBKTKmeansK; float *inewCenters = args.newCenters + tid * m_iBKTKmeansK * p_index->GetFeatureDim(); - int * iclusterIdx = args.clusterIdx + tid * m_iBKTKmeansK; + SizeType * iclusterIdx = args.clusterIdx + tid * m_iBKTKmeansK; float * iclusterDist = args.clusterDist + tid * m_iBKTKmeansK; float idist = 0; - for (int i = istart; i < iend; i++) { + for (SizeType i = istart; i < iend; i++) { int clusterid = 0; float smallestDist = MaxDist; for (int k = 0; k < m_iBKTKmeansK; k++) { @@ -345,7 +329,7 @@ namespace SPTAG if (updateCenters) { const T* v = (const T*)p_index->GetSample(indices[i]); float* center = inewCenters + clusterid*p_index->GetFeatureDim(); - for (int j = 0; j < p_index->GetFeatureDim(); j++) center[j] += v[j]; + for (DimensionType j = 0; j < p_index->GetFeatureDim(); j++) center[j] += v[j]; if (smallestDist > iclusterDist[clusterid]) { iclusterDist[clusterid] = smallestDist; iclusterIdx[clusterid] = indices[i]; @@ -369,36 +353,50 @@ namespace SPTAG if (updateCenters) { for (int i = 1; i < threads; i++) { float* currCenter = args.newCenters + i*m_iBKTKmeansK*p_index->GetFeatureDim(); - for (int j = 0; j < m_iBKTKmeansK * p_index->GetFeatureDim(); j++) args.newCenters[j] += currCenter[j]; - } + for (size_t j = 0; j < ((size_t)m_iBKTKmeansK) * p_index->GetFeatureDim(); j++) args.newCenters[j] += currCenter[j]; - int maxcluster = 0; - for (int k = 1; k < m_iBKTKmeansK; k++) if (args.newCounts[maxcluster] < args.newCounts[k]) maxcluster = k; + for (int k = 0; k < m_iBKTKmeansK; k++) { + if (args.clusterIdx[i*m_iBKTKmeansK + k] != -1 && args.clusterDist[i*m_iBKTKmeansK + k] > args.clusterDist[k]) { + args.clusterDist[k] = args.clusterDist[i*m_iBKTKmeansK + k]; + args.clusterIdx[k] = args.clusterIdx[i*m_iBKTKmeansK + k]; + } + } + } - int maxid = maxcluster; - for (int tid = 1; tid < threads; tid++) { - if (args.clusterDist[maxid] < args.clusterDist[tid * m_iBKTKmeansK + maxcluster]) maxid = tid * m_iBKTKmeansK + maxcluster; + int maxcluster = -1; + SizeType maxCount = 0; + for (int k = 0; k < m_iBKTKmeansK; k++) { + if (args.newCounts[k] > maxCount && DistanceUtils::ComputeL2Distance((T*)p_index->GetSample(args.clusterIdx[k]), args.centers + k * p_index->GetFeatureDim(), p_index->GetFeatureDim()) > 1e-6) + { + maxcluster = k; + maxCount = args.newCounts[k]; + } } - if (args.clusterIdx[maxid] < 0 || args.clusterIdx[maxid] >= p_index->GetNumSamples()) - std::cout << "first:" << first << " last:" << last << " maxcluster:" << maxcluster << "(" << args.newCounts[maxcluster] << ") Error maxid:" << maxid << " dist:" << args.clusterDist[maxid] << std::endl; - maxid = args.clusterIdx[maxid]; + + if (maxcluster != -1 && (args.clusterIdx[maxcluster] < 0 || args.clusterIdx[maxcluster] >= p_index->GetNumSamples())) + std::cout << "first:" << first << " last:" << last << " maxcluster:" << maxcluster << "(" << args.newCounts[maxcluster] << ") Error dist:" << args.clusterDist[maxcluster] << std::endl; for (int k = 0; k < m_iBKTKmeansK; k++) { T* TCenter = args.newTCenters + k * p_index->GetFeatureDim(); if (args.newCounts[k] == 0) { - //int nextid = Utils::rand_int(last, first); - //while (args.label[nextid] != maxcluster) nextid = Utils::rand_int(last, first); - int nextid = maxid; - std::memcpy(TCenter, p_index->GetSample(nextid), sizeof(T)*p_index->GetFeatureDim()); + if (maxcluster != -1) { + //int nextid = Utils::rand_int(last, first); + //while (args.label[nextid] != maxcluster) nextid = Utils::rand_int(last, first); + SizeType nextid = args.clusterIdx[maxcluster]; + std::memcpy(TCenter, p_index->GetSample(nextid), sizeof(T)*p_index->GetFeatureDim()); + } + else { + std::memcpy(TCenter, args.centers + k * p_index->GetFeatureDim(), sizeof(T)*p_index->GetFeatureDim()); + } } else { float* currCenters = args.newCenters + k * p_index->GetFeatureDim(); - for (int j = 0; j < p_index->GetFeatureDim(); j++) currCenters[j] /= args.newCounts[k]; + for (DimensionType j = 0; j < p_index->GetFeatureDim(); j++) currCenters[j] /= args.newCounts[k]; if (p_index->GetDistCalcMethod() == DistCalcMethod::Cosine) { COMMON::Utils::Normalize(currCenters, p_index->GetFeatureDim(), COMMON::Utils::GetBase()); } - for (int j = 0; j < p_index->GetFeatureDim(); j++) TCenter[j] = (T)(currCenters[j]); + for (DimensionType j = 0; j < p_index->GetFeatureDim(); j++) TCenter[j] = (T)(currCenters[j]); } } } @@ -417,14 +415,14 @@ namespace SPTAG template int KmeansClustering(VectorIndex* p_index, - std::vector& indices, const int first, const int last, KmeansArgs& args) const { + std::vector& indices, const SizeType first, const SizeType last, KmeansArgs& args) const { int iterLimit = 100; - int batchEnd = min(first + m_iSamples, last); + SizeType batchEnd = min(first + m_iSamples, last); float currDiff, currDist, minClusterDist = MaxDist; for (int numKmeans = 0; numKmeans < 3; numKmeans++) { for (int k = 0; k < m_iBKTKmeansK; k++) { - int randid = COMMON::Utils::rand_int(last, first); + SizeType randid = COMMON::Utils::rand(last, first); std::memcpy(args.centers + k*p_index->GetFeatureDim(), p_index->GetSample(indices[randid]), sizeof(T)*p_index->GetFeatureDim()); } args.ClearCounts(); @@ -432,7 +430,7 @@ namespace SPTAG if (currDist < minClusterDist) { minClusterDist = currDist; memcpy(args.newTCenters, args.centers, sizeof(T)*m_iBKTKmeansK*p_index->GetFeatureDim()); - memcpy(args.counts, args.newCounts, sizeof(int) * m_iBKTKmeansK); + memcpy(args.counts, args.newCounts, sizeof(SizeType) * m_iBKTKmeansK); } } @@ -446,7 +444,7 @@ namespace SPTAG args.ClearCounts(); args.ClearDists(-MaxDist); currDist = KmeansAssign(p_index, indices, first, batchEnd, args, true); - memcpy(args.counts, args.newCounts, sizeof(int)*m_iBKTKmeansK); + memcpy(args.counts, args.newCounts, sizeof(SizeType) * m_iBKTKmeansK); currDiff = 0; for (int k = 0; k < m_iBKTKmeansK; k++) { @@ -466,7 +464,7 @@ namespace SPTAG args.ClearCounts(); args.ClearDists(MaxDist); currDist = KmeansAssign(p_index, indices, first, last, args, false); - memcpy(args.counts, args.newCounts, sizeof(int)*m_iBKTKmeansK); + memcpy(args.counts, args.newCounts, sizeof(SizeType) * m_iBKTKmeansK); int numClusters = 0; for (int i = 0; i < m_iBKTKmeansK; i++) if (args.counts[i] > 0) numClusters++; @@ -480,9 +478,9 @@ namespace SPTAG } private: - std::vector m_pTreeStart; + std::vector m_pTreeStart; std::vector m_pTreeRoots; - std::unordered_map m_pSampleCenterMap; + std::unordered_map m_pSampleCenterMap; public: int m_iTreeNumber, m_iBKTKmeansK, m_iBKTLeafSize, m_iSamples; diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/CommonUtils.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/CommonUtils.h index ca5ed83fca6758edc84fcc0466bdaa01cf5a8621..4bc8fbb60fd6e1736165ccd055978ac9eb623a50 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/CommonUtils.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/CommonUtils.h @@ -36,9 +36,9 @@ namespace SPTAG { class Utils { public: - static int rand_int(int high = RAND_MAX, int low = 0) // Generates a random int value. + static SizeType rand(SizeType high = MaxSize, SizeType low = 0) // Generates a random int value. { - return low + (int)(float(high - low)*(std::rand() / (RAND_MAX + 1.0))); + return low + (SizeType)(float(high - low)*(std::rand() / (RAND_MAX + 1.0))); } static inline float atomic_float_add(volatile float* ptr, const float operand) @@ -61,11 +61,11 @@ namespace SPTAG } } - static double GetVector(char* cstr, const char* sep, std::vector& arr, int& NumDim) { + static double GetVector(char* cstr, const char* sep, std::vector& arr, DimensionType& NumDim) { char* current; char* context = NULL; - int i = 0; + DimensionType i = 0; double sum = 0; arr.clear(); current = strtok_s(cstr, sep, &context); @@ -90,23 +90,23 @@ namespace SPTAG } template - static void Normalize(T* arr, int col, int base) { + static void Normalize(T* arr, DimensionType col, int base) { double vecLen = 0; - for (int j = 0; j < col; j++) { + for (DimensionType j = 0; j < col; j++) { double val = arr[j]; vecLen += val * val; } vecLen = std::sqrt(vecLen); if (vecLen < 1e-6) { T val = (T)(1.0 / std::sqrt((double)col) * base); - for (int j = 0; j < col; j++) arr[j] = val; + for (DimensionType j = 0; j < col; j++) arr[j] = val; } else { - for (int j = 0; j < col; j++) arr[j] = (T)(arr[j] / vecLen * base); + for (DimensionType j = 0; j < col; j++) arr[j] = (T)(arr[j] / vecLen * base); } } - static size_t ProcessLine(std::string& currentLine, std::vector& arr, int& D, int base, DistCalcMethod distCalcMethod) { + static size_t ProcessLine(std::string& currentLine, std::vector& arr, DimensionType& D, int base, DistCalcMethod distCalcMethod) { size_t index; double vecLen; if (currentLine.length() == 0 || (index = currentLine.find_last_of("\t")) == std::string::npos || (vecLen = GetVector(const_cast(currentLine.c_str() + index + 1), "|", arr, D)) < -1) { @@ -121,10 +121,10 @@ namespace SPTAG } template - static void PrepareQuerys(std::ifstream& inStream, std::vector& qString, std::vector>& Query, int& NumQuery, int& NumDim, DistCalcMethod distCalcMethod, int base) { + static void PrepareQuerys(std::ifstream& inStream, std::vector& qString, std::vector>& Query, SizeType& NumQuery, DimensionType& NumDim, DistCalcMethod distCalcMethod, int base) { std::string currentLine; std::vector arr; - int i = 0; + SizeType i = 0; size_t index; while ((NumQuery < 0 || i < NumQuery) && !inStream.eof()) { std::getline(inStream, currentLine); @@ -132,9 +132,9 @@ namespace SPTAG continue; } qString.push_back(currentLine.substr(0, index)); - if (Query.size() < i + 1) Query.push_back(std::vector(NumDim, 0)); + if ((SizeType)Query.size() < i + 1) Query.push_back(std::vector(NumDim, 0)); - for (int j = 0; j < NumDim; j++) Query[i][j] = (T)arr[j]; + for (DimensionType j = 0; j < NumDim; j++) Query[i][j] = (T)arr[j]; i++; } NumQuery = i; @@ -149,12 +149,12 @@ namespace SPTAG return 1; } - static inline void AddNeighbor(int idx, float dist, int *neighbors, float *dists, int size) + static inline void AddNeighbor(SizeType idx, float dist, SizeType *neighbors, float *dists, DimensionType size) { size--; if (dist < dists[size] || (dist == dists[size] && idx < neighbors[size])) { - int nb; + DimensionType nb; for (nb = 0; nb <= size && neighbors[nb] != idx; nb++); if (nb > size) diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/DataUtils.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/DataUtils.h index affc4687da04f739ed44c93a427456a322a957d2..5d751c4c9870ceca705811029509978c2f6f34eb 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/DataUtils.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/DataUtils.h @@ -13,158 +13,18 @@ namespace SPTAG { namespace COMMON { - const int bufsize = 1024 * 1024 * 1024; + const int bufsize = 1 << 30; class DataUtils { public: - template - static void ProcessTSVData(int id, int threadbase, std::uint64_t blocksize, - std::string filename, std::string outfile, std::string outmetafile, std::string outmetaindexfile, - std::atomic_int& numSamples, int& D, DistCalcMethod distCalcMethod) { - std::ifstream inputStream(filename); - if (!inputStream.is_open()) { - std::cerr << "unable to open file " + filename << std::endl; - throw MyException("unable to open file " + filename); - exit(1); - } - std::ofstream outputStream, metaStream_out, metaStream_index; - outputStream.open(outfile + std::to_string(id + threadbase), std::ofstream::binary); - metaStream_out.open(outmetafile + std::to_string(id + threadbase), std::ofstream::binary); - metaStream_index.open(outmetaindexfile + std::to_string(id + threadbase), std::ofstream::binary); - if (!outputStream.is_open() || !metaStream_out.is_open() || !metaStream_index.is_open()) { - std::cerr << "unable to open output file " << outfile << " " << outmetafile << " " << outmetaindexfile << std::endl; - throw MyException("unable to open output files"); - exit(1); - } - - std::vector arr; - std::vector sample; - - int base = 1; - if (distCalcMethod == DistCalcMethod::Cosine) { - base = Utils::GetBase(); - } - std::uint64_t writepos = 0; - int sampleSize = 0; - std::uint64_t totalread = 0; - std::streamoff startpos = id * blocksize; - -#ifndef _MSC_VER - int enter_size = 1; -#else - int enter_size = 1; -#endif - std::string currentLine; - size_t index; - inputStream.seekg(startpos, std::ifstream::beg); - if (id != 0) { - std::getline(inputStream, currentLine); - totalread += currentLine.length() + enter_size; - } - std::cout << "Begin thread " << id << " begin at:" << (startpos + totalread) << std::endl; - while (!inputStream.eof() && totalread <= blocksize) { - std::getline(inputStream, currentLine); - if (currentLine.length() <= enter_size || (index = Utils::ProcessLine(currentLine, arr, D, base, distCalcMethod)) < 0) { - totalread += currentLine.length() + enter_size; - continue; - } - sample.resize(D); - for (int j = 0; j < D; j++) sample[j] = (T)arr[j]; - - outputStream.write((char *)(sample.data()), sizeof(T)*D); - metaStream_index.write((char *)&writepos, sizeof(std::uint64_t)); - metaStream_out.write(currentLine.c_str(), index); - - writepos += index; - sampleSize += 1; - totalread += currentLine.length() + enter_size; - } - metaStream_index.write((char *)&writepos, sizeof(std::uint64_t)); - metaStream_index.write((char *)&sampleSize, sizeof(int)); - inputStream.close(); - outputStream.close(); - metaStream_out.close(); - metaStream_index.close(); - - numSamples.fetch_add(sampleSize); - - std::cout << "Finish Thread[" << id << ", " << sampleSize << "] at:" << (startpos + totalread) << std::endl; - } - - static void MergeData(int threadbase, std::string outfile, std::string outmetafile, std::string outmetaindexfile, - std::atomic_int& numSamples, int D) { - std::ifstream inputStream; - std::ofstream outputStream; - char * buf = new char[bufsize]; - std::uint64_t * offsets; - int partSamples; - int metaSamples = 0; - std::uint64_t lastoff = 0; - - outputStream.open(outfile, std::ofstream::binary); - outputStream.write((char *)&numSamples, sizeof(int)); - outputStream.write((char *)&D, sizeof(int)); - for (int i = 0; i < threadbase; i++) { - std::string file = outfile + std::to_string(i); - inputStream.open(file, std::ifstream::binary); - while (!inputStream.eof()) { - inputStream.read(buf, bufsize); - outputStream.write(buf, inputStream.gcount()); - } - inputStream.close(); - remove(file.c_str()); - } - outputStream.close(); - - outputStream.open(outmetafile, std::ofstream::binary); - for (int i = 0; i < threadbase; i++) { - std::string file = outmetafile + std::to_string(i); - inputStream.open(file, std::ifstream::binary); - while (!inputStream.eof()) { - inputStream.read(buf, bufsize); - outputStream.write(buf, inputStream.gcount()); - } - inputStream.close(); - remove(file.c_str()); - } - outputStream.close(); - delete[] buf; - - outputStream.open(outmetaindexfile, std::ofstream::binary); - outputStream.write((char *)&numSamples, sizeof(int)); - for (int i = 0; i < threadbase; i++) { - std::string file = outmetaindexfile + std::to_string(i); - inputStream.open(file, std::ifstream::binary); - - inputStream.seekg(-((long long)sizeof(int)), inputStream.end); - inputStream.read((char *)&partSamples, sizeof(int)); - offsets = new std::uint64_t[partSamples + 1]; - - inputStream.seekg(0, inputStream.beg); - inputStream.read((char *)offsets, sizeof(std::uint64_t)*(partSamples + 1)); - inputStream.close(); - remove(file.c_str()); - - for (int j = 0; j < partSamples + 1; j++) - offsets[j] += lastoff; - outputStream.write((char *)offsets, sizeof(std::uint64_t)*partSamples); - - lastoff = offsets[partSamples]; - metaSamples += partSamples; - delete[] offsets; - } - outputStream.write((char *)&lastoff, sizeof(std::uint64_t)); - outputStream.close(); - - std::cout << "numSamples:" << numSamples << " metaSamples:" << metaSamples << " D:" << D << std::endl; - } - static bool MergeIndex(const std::string& p_vectorfile1, const std::string& p_metafile1, const std::string& p_metaindexfile1, const std::string& p_vectorfile2, const std::string& p_metafile2, const std::string& p_metaindexfile2) { std::ifstream inputStream1, inputStream2; std::ofstream outputStream; - char * buf = new char[bufsize]; - int R1, R2, C1, C2; + std::unique_ptr bufferHolder(new char[bufsize]); + char * buf = bufferHolder.get(); + SizeType R1, R2; + DimensionType C1, C2; #define MergeVector(inputStream, vectorFile, R, C) \ inputStream.open(vectorFile, std::ifstream::binary); \ @@ -172,8 +32,8 @@ namespace SPTAG std::cout << "Cannot open vector file: " << vectorFile <<"!" << std::endl; \ return false; \ } \ - inputStream.read((char *)&(R), sizeof(int)); \ - inputStream.read((char *)&(C), sizeof(int)); \ + inputStream.read((char *)&(R), sizeof(SizeType)); \ + inputStream.read((char *)&(C), sizeof(DimensionType)); \ MergeVector(inputStream1, p_vectorfile1, R1, C1) MergeVector(inputStream2, p_vectorfile2, R2, C2) @@ -185,8 +45,8 @@ namespace SPTAG } R1 += R2; outputStream.open(p_vectorfile1 + "_tmp", std::ofstream::binary); - outputStream.write((char *)&R1, sizeof(int)); - outputStream.write((char *)&C1, sizeof(int)); + outputStream.write((char *)&R1, sizeof(SizeType)); + outputStream.write((char *)&C1, sizeof(DimensionType)); while (!inputStream1.eof()) { inputStream1.read(buf, bufsize); outputStream.write(buf, inputStream1.gcount()); @@ -218,26 +78,22 @@ namespace SPTAG outputStream.close(); delete[] buf; - - std::uint64_t * offsets; - int partSamples; + std::uint64_t * offsets = reinterpret_cast(buf); std::uint64_t lastoff = 0; outputStream.open(p_metaindexfile1 + "_tmp", std::ofstream::binary); - outputStream.write((char *)&R1, sizeof(int)); + outputStream.write((char *)&R1, sizeof(SizeType)); #define MergeMetaIndex(inputStream, metaIndexFile) \ inputStream.open(metaIndexFile, std::ifstream::binary); \ if (!inputStream.is_open()) { \ std::cout << "Cannot open meta index file: " << metaIndexFile << "!" << std::endl; \ return false; \ } \ - inputStream.read((char *)&partSamples, sizeof(int)); \ - offsets = new std::uint64_t[partSamples + 1]; \ - inputStream.read((char *)offsets, sizeof(std::uint64_t)*(partSamples + 1)); \ + inputStream.read((char *)&R2, sizeof(SizeType)); \ + inputStream.read((char *)offsets, sizeof(std::uint64_t)*(R2 + 1)); \ inputStream.close(); \ - for (int j = 0; j < partSamples + 1; j++) offsets[j] += lastoff; \ - outputStream.write((char *)offsets, sizeof(std::uint64_t)*partSamples); \ - lastoff = offsets[partSamples]; \ - delete[] offsets; \ + for (SizeType j = 0; j < R2 + 1; j++) offsets[j] += lastoff; \ + outputStream.write((char *)offsets, sizeof(std::uint64_t)*R2); \ + lastoff = offsets[R2]; \ MergeMetaIndex(inputStream1, p_metaindexfile1) MergeMetaIndex(inputStream2, p_metaindexfile2) @@ -253,36 +109,6 @@ namespace SPTAG std::cout << "Merged -> numSamples:" << R1 << " D:" << C1 << std::endl; return true; } - - template - static void ParseData(std::string filenames, std::string outfile, std::string outmetafile, std::string outmetaindexfile, - int threadnum, DistCalcMethod distCalcMethod) { - omp_set_num_threads(threadnum); - - std::atomic_int numSamples = { 0 }; - int D = -1; - - int threadbase = 0; - std::vector inputFileNames = Helper::StrUtils::SplitString(filenames, ","); - for (std::string inputFileName : inputFileNames) - { -#ifndef _MSC_VER - struct stat stat_buf; - stat(inputFileName.c_str(), &stat_buf); -#else - struct _stat64 stat_buf; - int res = _stat64(inputFileName.c_str(), &stat_buf); -#endif - std::uint64_t blocksize = (stat_buf.st_size + threadnum - 1) / threadnum; - -#pragma omp parallel for - for (int i = 0; i < threadnum; i++) { - ProcessTSVData(i, threadbase, blocksize, inputFileName, outfile, outmetafile, outmetaindexfile, numSamples, D, distCalcMethod); - } - threadbase += threadnum; - } - MergeData(threadbase, outfile, outmetafile, outmetaindexfile, numSamples, D); - } }; } } diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/Dataset.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/Dataset.h index b47d521d81dee3a6bc13281a0d5d67463f722af9..d00ea45365f4a4339599a7b0d861dc35104c13e5 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/Dataset.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/Dataset.h @@ -28,23 +28,31 @@ namespace SPTAG class Dataset { private: - int rows; - int cols; + std::string name = "Data"; + SizeType rows = 0; + DimensionType cols = 1; bool ownData = false; T* data = nullptr; - std::vector dataIncremental; - + SizeType incRows = 0; + std::vector incBlocks; + static const SizeType rowsInBlock = 1024 * 1024; public: - Dataset(): rows(0), cols(1) {} - Dataset(int rows_, int cols_, T* data_ = nullptr, bool transferOnwership_ = true) + Dataset() + { + incBlocks.reserve(MaxSize / rowsInBlock + 1); + } + Dataset(SizeType rows_, DimensionType cols_, T* data_ = nullptr, bool transferOnwership_ = true) { Initialize(rows_, cols_, data_, transferOnwership_); + incBlocks.reserve(MaxSize / rowsInBlock + 1); } ~Dataset() { if (ownData) aligned_free(data); + for (T* ptr : incBlocks) aligned_free(ptr); + incBlocks.clear(); } - void Initialize(int rows_, int cols_, T* data_ = nullptr, bool transferOnwership_ = true) + void Initialize(SizeType rows_, DimensionType cols_, T* data_ = nullptr, bool transferOnwership_ = true) { rows = rows_; cols = cols_; @@ -52,161 +60,166 @@ namespace SPTAG if (data_ == nullptr || !transferOnwership_) { ownData = true; - data = (T*)aligned_malloc(sizeof(T) * rows * cols, ALIGN); - if (data_ != nullptr) memcpy(data, data_, rows * cols * sizeof(T)); - else std::memset(data, -1, rows * cols * sizeof(T)); + data = (T*)aligned_malloc(((size_t)rows) * cols * sizeof(T), ALIGN); + if (data_ != nullptr) memcpy(data, data_, ((size_t)rows) * cols * sizeof(T)); + else std::memset(data, -1, ((size_t)rows) * cols * sizeof(T)); } } - void SetR(int R_) + void SetName(const std::string name_) { name = name_; } + void SetR(SizeType R_) { if (R_ >= rows) - dataIncremental.resize((R_ - rows) * cols); - else + incRows = R_ - rows; + else { rows = R_; - dataIncremental.clear(); + incRows = 0; } } - inline int R() const { return (int)(rows + dataIncremental.size() / cols); } - inline int C() const { return cols; } - T* operator[](int index) + inline SizeType R() const { return rows + incRows; } + inline DimensionType C() const { return cols; } + inline std::uint64_t BufferSize() const { return sizeof(SizeType) + sizeof(DimensionType) + sizeof(T) * R() * C(); } + + inline const T* At(SizeType index) const { if (index >= rows) { - return dataIncremental.data() + (size_t)(index - rows)*cols; + SizeType incIndex = index - rows; + return incBlocks[incIndex / rowsInBlock] + ((size_t)(incIndex % rowsInBlock)) * cols; } - return data + (size_t)index*cols; + return data + ((size_t)index) * cols; } - const T* operator[](int index) const + T* operator[](SizeType index) { - if (index >= rows) { - return dataIncremental.data() + (size_t)(index - rows)*cols; - } - return data + (size_t)index*cols; + return (T*)At(index); } - - void AddBatch(const T* pData, int num) + + const T* operator[](SizeType index) const { - dataIncremental.insert(dataIncremental.end(), pData, pData + num*cols); + return At(index); } - void AddBatch(int num) + ErrorCode AddBatch(const T* pData, SizeType num) { - dataIncremental.insert(dataIncremental.end(), (size_t)num*cols, T(-1)); + if (R() > MaxSize - num) return ErrorCode::MemoryOverFlow; + + SizeType written = 0; + while (written < num) { + SizeType curBlockIdx = (incRows + written) / rowsInBlock; + if (curBlockIdx >= (SizeType)incBlocks.size()) { + T* newBlock = (T*)aligned_malloc(((size_t)rowsInBlock) * cols * sizeof(T), ALIGN); + if (newBlock == nullptr) return ErrorCode::MemoryOverFlow; + incBlocks.push_back(newBlock); + } + SizeType curBlockPos = (incRows + written) % rowsInBlock; + SizeType toWrite = min(rowsInBlock - curBlockPos, num - written); + std::memcpy(incBlocks[curBlockIdx] + ((size_t)curBlockPos) * cols, pData + ((size_t)written) * cols, ((size_t)toWrite) * cols * sizeof(T)); + written += toWrite; + } + incRows += written; + return ErrorCode::Success; } - bool Save(std::string sDataPointsFileName) + ErrorCode AddBatch(SizeType num) { - std::cout << "Save Data To " << sDataPointsFileName << std::endl; - FILE * fp = fopen(sDataPointsFileName.c_str(), "wb"); - if (fp == NULL) return false; - - int CR = R(); - fwrite(&CR, sizeof(int), 1, fp); - fwrite(&cols, sizeof(int), 1, fp); - - T* ptr = data; - int toWrite = rows; - while (toWrite > 0) - { - size_t write = fwrite(ptr, sizeof(T) * cols, toWrite, fp); - ptr += write * cols; - toWrite -= (int)write; - } - ptr = dataIncremental.data(); - toWrite = CR - rows; - while (toWrite > 0) - { - size_t write = fwrite(ptr, sizeof(T) * cols, toWrite, fp); - ptr += write * cols; - toWrite -= (int)write; + if (R() > MaxSize - num) return ErrorCode::MemoryOverFlow; + + SizeType written = 0; + while (written < num) { + SizeType curBlockIdx = (incRows + written) / rowsInBlock; + if (curBlockIdx >= (SizeType)incBlocks.size()) { + T* newBlock = (T*)aligned_malloc(((size_t)rowsInBlock) * cols * sizeof(T), ALIGN); + if (newBlock == nullptr) return ErrorCode::MemoryOverFlow; + incBlocks.push_back(newBlock); + } + SizeType curBlockPos = (incRows + written) % rowsInBlock; + SizeType toWrite = min(rowsInBlock - curBlockPos, num - written); + std::memset(incBlocks[curBlockIdx] + ((size_t)curBlockPos) * cols, -1, ((size_t)toWrite) * cols * sizeof(T)); + written += toWrite; } - fclose(fp); + incRows += written; + return ErrorCode::Success; + } - std::cout << "Save Data (" << CR << ", " << cols << ") Finish!" << std::endl; + bool Save(std::ostream& p_outstream) const + { + SizeType CR = R(); + p_outstream.write((char*)&CR, sizeof(SizeType)); + p_outstream.write((char*)&cols, sizeof(DimensionType)); + p_outstream.write((char*)data, sizeof(T) * cols * rows); + + SizeType blocks = incRows / rowsInBlock; + for (int i = 0; i < blocks; i++) + p_outstream.write((char*)incBlocks[i], sizeof(T) * cols * rowsInBlock); + + SizeType remain = incRows % rowsInBlock; + if (remain > 0) p_outstream.write((char*)incBlocks[blocks], sizeof(T) * cols * remain); + std::cout << "Save " << name << " (" << CR << ", " << cols << ") Finish!" << std::endl; return true; } - bool Save(void **pDataPointsMemFile, int64_t &len) + bool Save(std::string sDataPointsFileName) const { - size_t size = sizeof(int) + sizeof(int) + sizeof(T) * R() *cols; - char *mem = (char*)malloc(size); - if (mem == NULL) return false; - - int CR = R(); - - auto header = (int*)mem; - header[0] = CR; - header[1] = cols; - auto body = &mem[8]; - - memcpy(body, data, sizeof(T) * cols * rows); - body += sizeof(T) * cols * rows; - memcpy(body, dataIncremental.data(), sizeof(T) * cols * (CR - rows)); - body += sizeof(T) * cols * (CR - rows); - - *pDataPointsMemFile = mem; - len = size; - + std::cout << "Save " << name << " To " << sDataPointsFileName << std::endl; + std::ofstream output(sDataPointsFileName, std::ios::binary); + if (!output.is_open()) return false; + Save(output); + output.close(); return true; } bool Load(std::string sDataPointsFileName) { - std::cout << "Load Data From " << sDataPointsFileName << std::endl; - FILE * fp = fopen(sDataPointsFileName.c_str(), "rb"); - if (fp == NULL) return false; + std::cout << "Load " << name << " From " << sDataPointsFileName << std::endl; + std::ifstream input(sDataPointsFileName, std::ios::binary); + if (!input.is_open()) return false; - int R, C; - fread(&R, sizeof(int), 1, fp); - fread(&C, sizeof(int), 1, fp); + input.read((char*)&rows, sizeof(SizeType)); + input.read((char*)&cols, sizeof(DimensionType)); - Initialize(R, C); - T* ptr = data; - while (R > 0) { - size_t read = fread(ptr, sizeof(T) * C, R, fp); - ptr += read * C; - R -= (int)read; - } - fclose(fp); - std::cout << "Load Data (" << rows << ", " << cols << ") Finish!" << std::endl; + Initialize(rows, cols); + input.read((char*)data, sizeof(T) * cols * rows); + input.close(); + std::cout << "Load " << name << " (" << rows << ", " << cols << ") Finish!" << std::endl; return true; } // Functions for loading models from memory mapped files bool Load(char* pDataPointsMemFile) { - int R, C; - R = *((int*)pDataPointsMemFile); - pDataPointsMemFile += sizeof(int); + SizeType R; + DimensionType C; + R = *((SizeType*)pDataPointsMemFile); + pDataPointsMemFile += sizeof(SizeType); - C = *((int*)pDataPointsMemFile); - pDataPointsMemFile += sizeof(int); + C = *((DimensionType*)pDataPointsMemFile); + pDataPointsMemFile += sizeof(DimensionType); Initialize(R, C, (T*)pDataPointsMemFile); + std::cout << "Load " << name << " (" << R << ", " << C << ") Finish!" << std::endl; return true; } - bool Refine(const std::vector& indices, std::string sDataPointsFileName) + bool Refine(const std::vector& indices, std::ostream& output) { - std::cout << "Save Refine Data To " << sDataPointsFileName << std::endl; - FILE * fp = fopen(sDataPointsFileName.c_str(), "wb"); - if (fp == NULL) return false; - - int R = (int)(indices.size()); - fwrite(&R, sizeof(int), 1, fp); - fwrite(&cols, sizeof(int), 1, fp); + SizeType R = (SizeType)(indices.size()); + output.write((char*)&R, sizeof(SizeType)); + output.write((char*)&cols, sizeof(DimensionType)); - // write point one by one in case for cache miss - for (int i = 0; i < R; i++) { - if (indices[i] < rows) - fwrite(data + (size_t)indices[i] * cols, sizeof(T) * cols, 1, fp); - else - fwrite(dataIncremental.data() + (size_t)(indices[i] - rows) * cols, sizeof(T) * cols, 1, fp); + for (SizeType i = 0; i < R; i++) { + output.write((char*)At(indices[i]), sizeof(T) * cols); } - fclose(fp); + std::cout << "Save Refine " << name << " (" << R << ", " << cols << ") Finish!" << std::endl; + return true; + } - std::cout << "Save Refine Data (" << R << ", " << cols << ") Finish!" << std::endl; + bool Refine(const std::vector& indices, std::string sDataPointsFileName) + { + std::cout << "Save Refine " << name << " To " << sDataPointsFileName << std::endl; + std::ofstream output(sDataPointsFileName, std::ios::binary); + if (!output.is_open()) return false; + Refine(indices, output); + output.close(); return true; } }; diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/DistanceUtils.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/DistanceUtils.h index 828eead752c4f701ca8d6bff56a72100cb41941b..8e1d34924544cf596273f604f29f8c7edf544d05 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/DistanceUtils.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/DistanceUtils.h @@ -199,7 +199,7 @@ namespace SPTAG #endif /* template - static float ComputeL2Distance(const T *pX, const T *pY, int length) + static float ComputeL2Distance(const T *pX, const T *pY, DimensionType length) { float diff = 0; const T* pEnd1 = pX + length; @@ -217,7 +217,7 @@ namespace SPTAG result = acc(result, exec(c1, c2)); \ } \ - static float ComputeL2Distance(const std::int8_t *pX, const std::int8_t *pY, int length) + static float ComputeL2Distance(const std::int8_t *pX, const std::int8_t *pY, DimensionType length) { const std::int8_t* pEnd32 = pX + ((length >> 5) << 5); const std::int8_t* pEnd16 = pX + ((length >> 4) << 4); @@ -258,7 +258,7 @@ namespace SPTAG return diff; } - static float ComputeL2Distance(const std::uint8_t *pX, const std::uint8_t *pY, int length) + static float ComputeL2Distance(const std::uint8_t *pX, const std::uint8_t *pY, DimensionType length) { const std::uint8_t* pEnd32 = pX + ((length >> 5) << 5); const std::uint8_t* pEnd16 = pX + ((length >> 4) << 4); @@ -299,7 +299,7 @@ namespace SPTAG return diff; } - static float ComputeL2Distance(const std::int16_t *pX, const std::int16_t *pY, int length) + static float ComputeL2Distance(const std::int16_t *pX, const std::int16_t *pY, DimensionType length) { const std::int16_t* pEnd16 = pX + ((length >> 4) << 4); const std::int16_t* pEnd8 = pX + ((length >> 3) << 3); @@ -341,7 +341,7 @@ namespace SPTAG return diff; } - static float ComputeL2Distance(const float *pX, const float *pY, int length) + static float ComputeL2Distance(const float *pX, const float *pY, DimensionType length) { const float* pEnd16 = pX + ((length >> 4) << 4); const float* pEnd4 = pX + ((length >> 2) << 2); @@ -389,14 +389,14 @@ namespace SPTAG } /* template - static float ComputeCosineDistance(const T *pX, const T *pY, int length) { + static float ComputeCosineDistance(const T *pX, const T *pY, DimensionType length) { float diff = 0; const T* pEnd1 = pX + length; while (pX < pEnd1) diff += (*pX++) * (*pY++); return 1 - diff; } */ - static float ComputeCosineDistance(const std::int8_t *pX, const std::int8_t *pY, int length) { + static float ComputeCosineDistance(const std::int8_t *pX, const std::int8_t *pY, DimensionType length) { const std::int8_t* pEnd32 = pX + ((length >> 5) << 5); const std::int8_t* pEnd16 = pX + ((length >> 4) << 4); const std::int8_t* pEnd4 = pX + ((length >> 2) << 2); @@ -436,7 +436,7 @@ namespace SPTAG return 16129 - diff; } - static float ComputeCosineDistance(const std::uint8_t *pX, const std::uint8_t *pY, int length) { + static float ComputeCosineDistance(const std::uint8_t *pX, const std::uint8_t *pY, DimensionType length) { const std::uint8_t* pEnd32 = pX + ((length >> 5) << 5); const std::uint8_t* pEnd16 = pX + ((length >> 4) << 4); const std::uint8_t* pEnd4 = pX + ((length >> 2) << 2); @@ -476,7 +476,7 @@ namespace SPTAG return 65025 - diff; } - static float ComputeCosineDistance(const std::int16_t *pX, const std::int16_t *pY, int length) { + static float ComputeCosineDistance(const std::int16_t *pX, const std::int16_t *pY, DimensionType length) { const std::int16_t* pEnd16 = pX + ((length >> 4) << 4); const std::int16_t* pEnd8 = pX + ((length >> 3) << 3); const std::int16_t* pEnd4 = pX + ((length >> 2) << 2); @@ -517,7 +517,7 @@ namespace SPTAG return 1073676289 - diff; } - static float ComputeCosineDistance(const float *pX, const float *pY, int length) { + static float ComputeCosineDistance(const float *pX, const float *pY, DimensionType length) { const float* pEnd16 = pX + ((length >> 4) << 4); const float* pEnd4 = pX + ((length >> 2) << 2); const float* pEnd1 = pX + length; @@ -564,7 +564,7 @@ namespace SPTAG } template - static inline float ComputeDistance(const T *p1, const T *p2, int length, SPTAG::DistCalcMethod distCalcMethod) + static inline float ComputeDistance(const T *p1, const T *p2, DimensionType length, SPTAG::DistCalcMethod distCalcMethod) { if (distCalcMethod == SPTAG::DistCalcMethod::L2) return ComputeL2Distance(p1, p2, length); @@ -588,7 +588,7 @@ namespace SPTAG template - float (*DistanceCalcSelector(SPTAG::DistCalcMethod p_method)) (const T*, const T*, int) + float (*DistanceCalcSelector(SPTAG::DistCalcMethod p_method)) (const T*, const T*, DimensionType) { switch (p_method) { diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/FineGrainedLock.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/FineGrainedLock.h index d8bfbaa9fe3b51006c5171f42d32e2530d8549fb..0de7ed8b36844c3a98674268c050ff6490068938 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/FineGrainedLock.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/FineGrainedLock.h @@ -16,30 +16,30 @@ namespace SPTAG public: FineGrainedLock() {} ~FineGrainedLock() { - for (int i = 0; i < locks.size(); i++) + for (size_t i = 0; i < locks.size(); i++) locks[i].reset(); locks.clear(); } - void resize(int n) { - int current = (int)locks.size(); + void resize(SizeType n) { + SizeType current = (SizeType)locks.size(); if (current <= n) { locks.resize(n); - for (int i = current; i < n; i++) + for (SizeType i = current; i < n; i++) locks[i].reset(new std::mutex); } else { - for (int i = n; i < current; i++) + for (SizeType i = n; i < current; i++) locks[i].reset(); locks.resize(n); } } - std::mutex& operator[](int idx) { + std::mutex& operator[](SizeType idx) { return *locks[idx]; } - const std::mutex& operator[](int idx) const { + const std::mutex& operator[](SizeType idx) const { return *locks[idx]; } private: diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/KDTree.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/KDTree.h index 95d4ae36e46d82ad6f464366d3f429200da8f3cf..e46c133940dc47ad2594783bfb3413336992eff6 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/KDTree.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/KDTree.h @@ -23,9 +23,9 @@ namespace SPTAG // node type for storing KDT struct KDTNode { - int left; - int right; - short split_dim; + SizeType left; + SizeType right; + DimensionType split_dim; float split_value; }; @@ -39,18 +39,18 @@ namespace SPTAG m_iSamples(other.m_iSamples) {} ~KDTree() {} - inline const KDTNode& operator[](int index) const { return m_pTreeRoots[index]; } - inline KDTNode& operator[](int index) { return m_pTreeRoots[index]; } + inline const KDTNode& operator[](SizeType index) const { return m_pTreeRoots[index]; } + inline KDTNode& operator[](SizeType index) { return m_pTreeRoots[index]; } - inline int size() const { return (int)m_pTreeRoots.size(); } + inline SizeType size() const { return (SizeType)m_pTreeRoots.size(); } template - void BuildTrees(VectorIndex* p_index, std::vector* indices = nullptr) + void BuildTrees(VectorIndex* p_index, std::vector* indices = nullptr) { - std::vector localindices; + std::vector localindices; if (indices == nullptr) { localindices.resize(p_index->GetNumSamples()); - for (int i = 0; i < p_index->GetNumSamples(); i++) localindices[i] = i; + for (SizeType i = 0; i < p_index->GetNumSamples(); i++) localindices[i] = i; } else { localindices.assign(indices->begin(), indices->end()); @@ -63,58 +63,41 @@ namespace SPTAG { Sleep(i * 100); std::srand(clock()); - std::vector pindices(localindices.begin(), localindices.end()); + std::vector pindices(localindices.begin(), localindices.end()); std::random_shuffle(pindices.begin(), pindices.end()); - m_pTreeStart[i] = i * (int)pindices.size(); + m_pTreeStart[i] = i * (SizeType)pindices.size(); std::cout << "Start to build KDTree " << i + 1 << std::endl; - int iTreeSize = m_pTreeStart[i]; - DivideTree(p_index, pindices, 0, (int)pindices.size() - 1, m_pTreeStart[i], iTreeSize); + SizeType iTreeSize = m_pTreeStart[i]; + DivideTree(p_index, pindices, 0, (SizeType)pindices.size() - 1, m_pTreeStart[i], iTreeSize); std::cout << i + 1 << " KDTree built, " << iTreeSize - m_pTreeStart[i] << " " << pindices.size() << std::endl; } } - bool SaveTrees(void **pKDTMemFile, int64_t &len) const - { - int treeNodeSize = (int)m_pTreeRoots.size(); - - size_t size = sizeof(int) + - sizeof(int) * m_iTreeNumber + - sizeof(int) + - sizeof(KDTNode) * treeNodeSize; - char *mem = (char*)malloc(size); - if (mem == NULL) return false; - - auto ptr = mem; - *(int*)ptr = m_iTreeNumber; - ptr += sizeof(int); - - memcpy(ptr, m_pTreeStart.data(), sizeof(int) * m_iTreeNumber); - ptr += sizeof(int) * m_iTreeNumber; - - *(int*)ptr = treeNodeSize; - ptr += sizeof(int); - - memcpy(ptr, m_pTreeRoots.data(), sizeof(KDTNode) * treeNodeSize); - *pKDTMemFile = mem; - len = size; + inline std::uint64_t BufferSize() const + { + return sizeof(int) + sizeof(SizeType) * m_iTreeNumber + + sizeof(SizeType) + sizeof(KDTNode) * m_pTreeRoots.size(); + } + bool SaveTrees(std::ostream& p_outstream) const + { + p_outstream.write((char*)&m_iTreeNumber, sizeof(int)); + p_outstream.write((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber); + SizeType treeNodeSize = (SizeType)m_pTreeRoots.size(); + p_outstream.write((char*)&treeNodeSize, sizeof(SizeType)); + p_outstream.write((char*)m_pTreeRoots.data(), sizeof(KDTNode) * treeNodeSize); + std::cout << "Save KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; return true; } bool SaveTrees(std::string sTreeFileName) const { std::cout << "Save KDT to " << sTreeFileName << std::endl; - FILE *fp = fopen(sTreeFileName.c_str(), "wb"); - if (fp == NULL) return false; - - fwrite(&m_iTreeNumber, sizeof(int), 1, fp); - fwrite(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp); - int treeNodeSize = (int)m_pTreeRoots.size(); - fwrite(&treeNodeSize, sizeof(int), 1, fp); - fwrite(m_pTreeRoots.data(), sizeof(KDTNode), treeNodeSize, fp); - fclose(fp); - std::cout << "Save KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; + std::ofstream output(sTreeFileName, std::ios::binary); + if (!output.is_open()) return false; + SaveTrees(output); + output.close(); return true; } @@ -123,31 +106,32 @@ namespace SPTAG m_iTreeNumber = *((int*)pKDTMemFile); pKDTMemFile += sizeof(int); m_pTreeStart.resize(m_iTreeNumber); - memcpy(m_pTreeStart.data(), pKDTMemFile, sizeof(int) * m_iTreeNumber); - pKDTMemFile += sizeof(int)*m_iTreeNumber; + memcpy(m_pTreeStart.data(), pKDTMemFile, sizeof(SizeType) * m_iTreeNumber); + pKDTMemFile += sizeof(SizeType)*m_iTreeNumber; - int treeNodeSize = *((int*)pKDTMemFile); - pKDTMemFile += sizeof(int); + SizeType treeNodeSize = *((SizeType*)pKDTMemFile); + pKDTMemFile += sizeof(SizeType); m_pTreeRoots.resize(treeNodeSize); memcpy(m_pTreeRoots.data(), pKDTMemFile, sizeof(KDTNode) * treeNodeSize); + std::cout << "Load KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; return true; } bool LoadTrees(std::string sTreeFileName) { std::cout << "Load KDT From " << sTreeFileName << std::endl; - FILE *fp = fopen(sTreeFileName.c_str(), "rb"); - if (fp == NULL) return false; + std::ifstream input(sTreeFileName, std::ios::binary); + if (!input.is_open()) return false; - fread(&m_iTreeNumber, sizeof(int), 1, fp); + input.read((char*)&m_iTreeNumber, sizeof(int)); m_pTreeStart.resize(m_iTreeNumber); - fread(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp); + input.read((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber); - int treeNodeSize; - fread(&treeNodeSize, sizeof(int), 1, fp); + SizeType treeNodeSize; + input.read((char*)&treeNodeSize, sizeof(SizeType)); m_pTreeRoots.resize(treeNodeSize); - fread(m_pTreeRoots.data(), sizeof(KDTNode), treeNodeSize, fp); - fclose(fp); + input.read((char*)m_pTreeRoots.data(), sizeof(KDTNode) * treeNodeSize); + input.close(); std::cout << "Load KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; return true; } @@ -155,7 +139,7 @@ namespace SPTAG template void InitSearchTrees(const VectorIndex* p_index, const COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const int p_limits) const { - for (char i = 0; i < m_iTreeNumber; i++) { + for (int i = 0; i < m_iTreeNumber; i++) { KDTSearch(p_index, p_query, p_space, m_pTreeStart[i], true, 0); } @@ -181,10 +165,10 @@ namespace SPTAG template void KDTSearch(const VectorIndex* p_index, const COMMON::QueryResultSet &p_query, - COMMON::WorkSpace& p_space, const int node, const bool isInit, const float distBound) const { + COMMON::WorkSpace& p_space, const SizeType node, const bool isInit, const float distBound) const { if (node < 0) { - int index = -node - 1; + SizeType index = -node - 1; if (index >= p_index->GetNumSamples()) return; #ifdef PREFETCH const char* data = (const char *)(p_index->GetSample(index)); @@ -203,7 +187,7 @@ namespace SPTAG float diff = (p_query.GetTarget())[tnode.split_dim] - tnode.split_value; float distanceBound = distBound + diff * diff; - int otherChild, bestChild; + SizeType otherChild, bestChild; if (diff < 0) { bestChild = tnode.left; @@ -224,10 +208,10 @@ namespace SPTAG template - void DivideTree(VectorIndex* p_index, std::vector& indices, int first, int last, - int index, int &iTreeSize) { + void DivideTree(VectorIndex* p_index, std::vector& indices, SizeType first, SizeType last, + SizeType index, SizeType &iTreeSize) { ChooseDivision(p_index, m_pTreeRoots[index], indices, first, last); - int i = Subdivide(p_index, m_pTreeRoots[index], indices, first, last); + SizeType i = Subdivide(p_index, m_pTreeRoots[index], indices, first, last); if (i - 1 <= first) { m_pTreeRoots[index].left = -indices[first] - 1; @@ -251,30 +235,30 @@ namespace SPTAG } template - void ChooseDivision(VectorIndex* p_index, KDTNode& node, const std::vector& indices, const int first, const int last) + void ChooseDivision(VectorIndex* p_index, KDTNode& node, const std::vector& indices, const SizeType first, const SizeType last) { std::vector meanValues(p_index->GetFeatureDim(), 0); std::vector varianceValues(p_index->GetFeatureDim(), 0); - int end = min(first + m_iSamples, last); - int count = end - first + 1; + SizeType end = min(first + m_iSamples, last); + SizeType count = end - first + 1; // calculate the mean of each dimension - for (int j = first; j <= end; j++) + for (SizeType j = first; j <= end; j++) { const T* v = (const T*)p_index->GetSample(indices[j]); - for (int k = 0; k < p_index->GetFeatureDim(); k++) + for (DimensionType k = 0; k < p_index->GetFeatureDim(); k++) { meanValues[k] += v[k]; } } - for (int k = 0; k < p_index->GetFeatureDim(); k++) + for (DimensionType k = 0; k < p_index->GetFeatureDim(); k++) { meanValues[k] /= count; } // calculate the variance of each dimension - for (int j = first; j <= end; j++) + for (SizeType j = first; j <= end; j++) { const T* v = (const T*)p_index->GetSample(indices[j]); - for (int k = 0; k < p_index->GetFeatureDim(); k++) + for (DimensionType k = 0; k < p_index->GetFeatureDim(); k++) { float dist = v[k] - meanValues[k]; varianceValues[k] += dist*dist; @@ -286,13 +270,13 @@ namespace SPTAG node.split_value = meanValues[node.split_dim]; } - int SelectDivisionDimension(const std::vector& varianceValues) const + DimensionType SelectDivisionDimension(const std::vector& varianceValues) const { // Record the top maximum variances - std::vector topind(m_numTopDimensionKDTSplit); + std::vector topind(m_numTopDimensionKDTSplit); int num = 0; // order the variances - for (int i = 0; i < varianceValues.size(); i++) + for (DimensionType i = 0; i < (DimensionType)varianceValues.size(); i++) { if (num < m_numTopDimensionKDTSplit || varianceValues[i] > varianceValues[topind[num - 1]]) { @@ -314,18 +298,18 @@ namespace SPTAG } } // randomly choose a dimension from TOP_DIM - return topind[COMMON::Utils::rand_int(num)]; + return topind[COMMON::Utils::rand(num)]; } template - int Subdivide(VectorIndex* p_index, const KDTNode& node, std::vector& indices, const int first, const int last) const + SizeType Subdivide(VectorIndex* p_index, const KDTNode& node, std::vector& indices, const SizeType first, const SizeType last) const { - int i = first; - int j = last; + SizeType i = first; + SizeType j = last; // decide which child one point belongs while (i <= j) { - int ind = indices[i]; + SizeType ind = indices[i]; const T* v = (const T*)p_index->GetSample(ind); float val = v[node.split_dim]; if (val < node.split_value) @@ -347,7 +331,7 @@ namespace SPTAG } private: - std::vector m_pTreeStart; + std::vector m_pTreeStart; std::vector m_pTreeRoots; public: diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/NeighborhoodGraph.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/NeighborhoodGraph.h index 02d2541754a0390e10f68081e7477680e172bb4e..ea47125c36cfa8a669b2c0c5fce3a615dd02bfd1 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/NeighborhoodGraph.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/NeighborhoodGraph.h @@ -27,18 +27,21 @@ namespace SPTAG m_iCEFScale(2), m_iRefineIter(0), m_iCEF(1000), - m_iMaxCheckForRefineGraph(10000) {} + m_iMaxCheckForRefineGraph(10000) + { + m_pNeighborhoodGraph.SetName("Graph"); + } ~NeighborhoodGraph() {} - virtual void InsertNeighbors(VectorIndex* index, const int node, int insertNode, float insertDist) = 0; + virtual void InsertNeighbors(VectorIndex* index, const SizeType node, SizeType insertNode, float insertDist) = 0; - virtual void RebuildNeighbors(VectorIndex* index, const int node, int* nodes, const BasicResult* queryResults, const int numResults) = 0; + virtual void RebuildNeighbors(VectorIndex* index, const SizeType node, SizeType* nodes, const BasicResult* queryResults, const int numResults) = 0; - virtual float GraphAccuracyEstimation(VectorIndex* index, const int samples, const std::unordered_map* idmap = nullptr) = 0; + virtual float GraphAccuracyEstimation(VectorIndex* index, const SizeType samples, const std::unordered_map* idmap = nullptr) = 0; template - void BuildGraph(VectorIndex* index, const std::unordered_map* idmap = nullptr) + void BuildGraph(VectorIndex* index, const std::unordered_map* idmap = nullptr) { std::cout << "build RNG graph!" << std::endl; @@ -55,11 +58,11 @@ namespace SPTAG { COMMON::Dataset NeighborhoodDists(m_iGraphSize, m_iNeighborhoodSize); - std::vector> TptreeDataIndices(m_iTPTNumber, std::vector(m_iGraphSize)); - std::vector>> TptreeLeafNodes(m_iTPTNumber, std::vector>()); + std::vector> TptreeDataIndices(m_iTPTNumber, std::vector(m_iGraphSize)); + std::vector>> TptreeLeafNodes(m_iTPTNumber, std::vector>()); - for (int i = 0; i < m_iGraphSize; i++) - for (int j = 0; j < m_iNeighborhoodSize; j++) + for (SizeType i = 0; i < m_iGraphSize; i++) + for (DimensionType j = 0; j < m_iNeighborhoodSize; j++) (NeighborhoodDists)[i][j] = MaxDist; std::cout << "Parallel TpTree Partition begin " << std::endl; @@ -67,7 +70,7 @@ namespace SPTAG for (int i = 0; i < m_iTPTNumber; i++) { Sleep(i * 100); std::srand(clock()); - for (int j = 0; j < m_iGraphSize; j++) TptreeDataIndices[i][j] = j; + for (SizeType j = 0; j < m_iGraphSize; j++) TptreeDataIndices[i][j] = j; std::random_shuffle(TptreeDataIndices[i].begin(), TptreeDataIndices[i].end()); PartitionByTptree(index, TptreeDataIndices[i], 0, m_iGraphSize - 1, TptreeLeafNodes[i]); std::cout << "Finish Getting Leaves for Tree " << i << std::endl; @@ -77,17 +80,17 @@ namespace SPTAG for (int i = 0; i < m_iTPTNumber; i++) { #pragma omp parallel for schedule(dynamic) - for (int j = 0; j < TptreeLeafNodes[i].size(); j++) + for (SizeType j = 0; j < (SizeType)TptreeLeafNodes[i].size(); j++) { - int start_index = TptreeLeafNodes[i][j].first; - int end_index = TptreeLeafNodes[i][j].second; + SizeType start_index = TptreeLeafNodes[i][j].first; + SizeType end_index = TptreeLeafNodes[i][j].second; if (omp_get_thread_num() == 0) std::cout << "\rProcessing Tree " << i << ' ' << j * 100 / TptreeLeafNodes[i].size() << '%'; - for (int x = start_index; x < end_index; x++) + for (SizeType x = start_index; x < end_index; x++) { - for (int y = x + 1; y <= end_index; y++) + for (SizeType y = x + 1; y <= end_index; y++) { - int p1 = TptreeDataIndices[i][x]; - int p2 = TptreeDataIndices[i][y]; + SizeType p1 = TptreeDataIndices[i][x]; + SizeType p2 = TptreeDataIndices[i][y]; float dist = index->ComputeDistance(index->GetSample(p1), index->GetSample(p2)); if (idmap != nullptr) { p1 = (idmap->find(p1) == idmap->end()) ? p1 : idmap->at(p1); @@ -112,13 +115,13 @@ namespace SPTAG } template - void RefineGraph(VectorIndex* index, const std::unordered_map* idmap = nullptr) + void RefineGraph(VectorIndex* index, const std::unordered_map* idmap = nullptr) { m_iCEF *= m_iCEFScale; m_iMaxCheckForRefineGraph *= m_iCEFScale; #pragma omp parallel for schedule(dynamic) - for (int i = 0; i < m_iGraphSize; i++) + for (SizeType i = 0; i < m_iGraphSize; i++) { RefineNode(index, i, false); if (i % 1000 == 0) std::cout << "\rRefine 1 " << (i * 100 / m_iGraphSize) << "%"; @@ -130,7 +133,7 @@ namespace SPTAG m_iNeighborhoodSize /= m_iNeighborhoodScale; #pragma omp parallel for schedule(dynamic) - for (int i = 0; i < m_iGraphSize; i++) + for (SizeType i = 0; i < m_iGraphSize; i++) { RefineNode(index, i, false); if (i % 1000 == 0) std::cout << "\rRefine 2 " << (i * 100 / m_iGraphSize) << "%"; @@ -147,17 +150,17 @@ namespace SPTAG } template - ErrorCode RefineGraph(VectorIndex* index, std::vector& indices, std::vector& reverseIndices, - std::string graphFileName, const std::unordered_map* idmap = nullptr) + ErrorCode RefineGraph(VectorIndex* index, std::vector& indices, std::vector& reverseIndices, + std::ostream& output, const std::unordered_map* idmap = nullptr) { - int R = (int)indices.size(); + SizeType R = (SizeType)indices.size(); #pragma omp parallel for schedule(dynamic) - for (int i = 0; i < R; i++) + for (SizeType i = 0; i < R; i++) { RefineNode(index, indices[i], false); - int* nodes = m_pNeighborhoodGraph[indices[i]]; - for (int j = 0; j < m_iNeighborhoodSize; j++) + SizeType* nodes = m_pNeighborhoodGraph[indices[i]]; + for (DimensionType j = 0; j < m_iNeighborhoodSize; j++) { if (nodes[j] < 0) nodes[j] = -1; else nodes[j] = reverseIndices[nodes[j]]; @@ -166,20 +169,13 @@ namespace SPTAG nodes[m_iNeighborhoodSize - 1] = -2 - idmap->at(-1 - indices[i]); } - std::ofstream graphOut(graphFileName, std::ios::binary); - if (!graphOut.is_open()) return ErrorCode::FailedCreateFile; - graphOut.write((char*)&R, sizeof(int)); - graphOut.write((char*)&m_iNeighborhoodSize, sizeof(int)); - for (int i = 0; i < R; i++) { - graphOut.write((char*)m_pNeighborhoodGraph[indices[i]], sizeof(int) * m_iNeighborhoodSize); - } - graphOut.close(); + m_pNeighborhoodGraph.Refine(indices, output); return ErrorCode::Success; } template - void RefineNode(VectorIndex* index, const int node, bool updateNeighbors) + void RefineNode(VectorIndex* index, const SizeType node, bool updateNeighbors) { COMMON::QueryResultSet query((const T*)index->GetSample(node), m_iCEF + 1); index->SearchIndex(query); @@ -200,8 +196,8 @@ namespace SPTAG } template - void PartitionByTptree(VectorIndex* index, std::vector& indices, const int first, const int last, - std::vector> & leaves) + void PartitionByTptree(VectorIndex* index, std::vector& indices, const SizeType first, const SizeType last, + std::vector> & leaves) { if (last - first <= m_iTPTLeafSize) { @@ -212,39 +208,39 @@ namespace SPTAG std::vector Mean(index->GetFeatureDim(), 0); int iIteration = 100; - int end = min(first + m_iSamples, last); - int count = end - first + 1; + SizeType end = min(first + m_iSamples, last); + SizeType count = end - first + 1; // calculate the mean of each dimension - for (int j = first; j <= end; j++) + for (SizeType j = first; j <= end; j++) { const T* v = (const T*)index->GetSample(indices[j]); - for (int k = 0; k < index->GetFeatureDim(); k++) + for (DimensionType k = 0; k < index->GetFeatureDim(); k++) { Mean[k] += v[k]; } } - for (int k = 0; k < index->GetFeatureDim(); k++) + for (DimensionType k = 0; k < index->GetFeatureDim(); k++) { Mean[k] /= count; } std::vector Variance; Variance.reserve(index->GetFeatureDim()); - for (int j = 0; j < index->GetFeatureDim(); j++) + for (DimensionType j = 0; j < index->GetFeatureDim(); j++) { Variance.push_back(BasicResult(j, 0)); } // calculate the variance of each dimension - for (int j = first; j <= end; j++) + for (SizeType j = first; j <= end; j++) { const T* v = (const T*)index->GetSample(indices[j]); - for (int k = 0; k < index->GetFeatureDim(); k++) + for (DimensionType k = 0; k < index->GetFeatureDim(); k++) { float dist = v[k] - Mean[k]; Variance[k].Dist += dist*dist; } } std::sort(Variance.begin(), Variance.end(), COMMON::Compare); - std::vector indexs(m_numTopDimensionTPTSplit); + std::vector indexs(m_numTopDimensionTPTSplit); std::vector weight(m_numTopDimensionTPTSplit), bestweight(m_numTopDimensionTPTSplit); float bestvariance = Variance[index->GetFeatureDim() - 1].Dist; for (int i = 0; i < m_numTopDimensionTPTSplit; i++) @@ -270,7 +266,7 @@ namespace SPTAG weight[j] /= sumweight; } float mean = 0; - for (int j = 0; j < count; j++) + for (SizeType j = 0; j < count; j++) { Val[j] = 0; const T* v = (const T*)index->GetSample(indices[first + j]); @@ -282,7 +278,7 @@ namespace SPTAG } mean /= count; float var = 0; - for (int j = 0; j < count; j++) + for (SizeType j = 0; j < count; j++) { float dist = Val[j] - mean; var += dist * dist; @@ -297,8 +293,8 @@ namespace SPTAG } } } - int i = first; - int j = last; + SizeType i = first; + SizeType j = last; // decide which child one point belongs while (i <= j) { @@ -336,100 +332,71 @@ namespace SPTAG } } + inline std::uint64_t BufferSize() const + { + return m_pNeighborhoodGraph.BufferSize(); + } + bool LoadGraph(std::string sGraphFilename) { - std::cout << "Load Graph From " << sGraphFilename << std::endl; - FILE * fp = fopen(sGraphFilename.c_str(), "rb"); - if (fp == NULL) return false; + if (!m_pNeighborhoodGraph.Load(sGraphFilename)) return false; - fread(&m_iGraphSize, sizeof(int), 1, fp); - fread(&m_iNeighborhoodSize, sizeof(int), 1, fp); - m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize); + m_iGraphSize = m_pNeighborhoodGraph.R(); + m_iNeighborhoodSize = m_pNeighborhoodGraph.C(); m_dataUpdateLock.resize(m_iGraphSize); - - for (int i = 0; i < m_iGraphSize; i++) - { - fread((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp); - } - fclose(fp); - std::cout << "Load Graph (" << m_iGraphSize << "," << m_iNeighborhoodSize << ") Finish!" << std::endl; return true; } - bool LoadGraphFromMemory(char* pGraphMemFile) + bool LoadGraph(char* pGraphMemFile) { - m_iGraphSize = *((int*)pGraphMemFile); - pGraphMemFile += sizeof(int); + m_pNeighborhoodGraph.Load(pGraphMemFile); - m_iNeighborhoodSize = *((int*)pGraphMemFile); - pGraphMemFile += sizeof(int); - - m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize, (int*)pGraphMemFile); + m_iGraphSize = m_pNeighborhoodGraph.R(); + m_iNeighborhoodSize = m_pNeighborhoodGraph.C(); m_dataUpdateLock.resize(m_iGraphSize); return true; } bool SaveGraph(std::string sGraphFilename) const { - std::cout << "Save Graph To " << sGraphFilename << std::endl; - FILE *fp = fopen(sGraphFilename.c_str(), "wb"); - if (fp == NULL) return false; - - fwrite(&m_iGraphSize, sizeof(int), 1, fp); - fwrite(&m_iNeighborhoodSize, sizeof(int), 1, fp); - for (int i = 0; i < m_iGraphSize; i++) - { - fwrite((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp); - } - fclose(fp); - std::cout << "Save Graph (" << m_iGraphSize << "," << m_iNeighborhoodSize << ") Finish!" << std::endl; - return true; + return m_pNeighborhoodGraph.Save(sGraphFilename); } - bool SaveGraphToMemory(void **pGraphMemFile, int64_t &len) { - size_t size = sizeof(int) + sizeof(int) + sizeof(int) * m_iNeighborhoodSize * m_iGraphSize; - char *mem = (char*)malloc(size); - if (mem == NULL) return false; - - auto ptr = mem; - *(int*)ptr = m_iGraphSize; - ptr += sizeof(int); - - *(int*)ptr = m_iNeighborhoodSize; - ptr += sizeof(int); + bool SaveGraph(std::ostream& output) const + { + return m_pNeighborhoodGraph.Save(output); + } - for (int i = 0; i < m_iGraphSize; i++) - { - memcpy(ptr, (m_pNeighborhoodGraph)[i], sizeof(int) * m_iNeighborhoodSize); - ptr += sizeof(int) * m_iNeighborhoodSize; - } - *pGraphMemFile = mem; - len = size; + inline ErrorCode AddBatch(SizeType num) + { + ErrorCode ret = m_pNeighborhoodGraph.AddBatch(num); + if (ret != ErrorCode::Success) return ret; - return true; + m_iGraphSize += num; + m_dataUpdateLock.resize(m_iGraphSize); + return ErrorCode::Success; } - inline void AddBatch(int num) { m_pNeighborhoodGraph.AddBatch(num); m_iGraphSize += num; m_dataUpdateLock.resize(m_iGraphSize); } - - inline int* operator[](int index) { return m_pNeighborhoodGraph[index]; } + inline SizeType* operator[](SizeType index) { return m_pNeighborhoodGraph[index]; } - inline const int* operator[](int index) const { return m_pNeighborhoodGraph[index]; } + inline const SizeType* operator[](SizeType index) const { return m_pNeighborhoodGraph[index]; } - inline void SetR(int rows) { m_pNeighborhoodGraph.SetR(rows); m_iGraphSize = rows; m_dataUpdateLock.resize(m_iGraphSize); } + inline void SetR(SizeType rows) { m_pNeighborhoodGraph.SetR(rows); m_iGraphSize = rows; m_dataUpdateLock.resize(m_iGraphSize); } - inline int R() const { return m_iGraphSize; } + inline SizeType R() const { return m_iGraphSize; } static std::shared_ptr CreateInstance(std::string type); protected: // Graph structure - int m_iGraphSize; - COMMON::Dataset m_pNeighborhoodGraph; + SizeType m_iGraphSize; + COMMON::Dataset m_pNeighborhoodGraph; COMMON::FineGrainedLock m_dataUpdateLock; // protect one row of the graph public: int m_iTPTNumber, m_iTPTLeafSize, m_iSamples, m_numTopDimensionTPTSplit; - int m_iNeighborhoodSize, m_iNeighborhoodScale, m_iCEFScale, m_iRefineIter, m_iCEF, m_iMaxCheckForRefineGraph; + DimensionType m_iNeighborhoodSize; + int m_iNeighborhoodScale, m_iCEFScale, m_iRefineIter, m_iCEF, m_iMaxCheckForRefineGraph; }; } } diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/QueryResultSet.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/QueryResultSet.h index 1e6628789286478c4163ae9805df76d9ed6b17b6..ff8fa14dfd1119b189748ef46ff60704d2859df7 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/QueryResultSet.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/QueryResultSet.h @@ -51,7 +51,7 @@ public: return m_results[0].Dist; } - bool AddPoint(const int index, float dist) + bool AddPoint(const SizeType index, float dist) { if (dist < m_results[0].Dist || (dist == m_results[0].Dist && index < m_results[0].VID)) { diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h index 499250864448649bd0f85f3507f98ccf6d59caa8..33ab01927ba3d6b8ab9d266be6eeed2f6cbb9fb4 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h @@ -13,15 +13,15 @@ namespace SPTAG class RelativeNeighborhoodGraph: public NeighborhoodGraph { public: - void RebuildNeighbors(VectorIndex* index, const int node, int* nodes, const BasicResult* queryResults, const int numResults) { - int count = 0; + void RebuildNeighbors(VectorIndex* index, const SizeType node, SizeType* nodes, const BasicResult* queryResults, const int numResults) { + DimensionType count = 0; for (int j = 0; j < numResults && count < m_iNeighborhoodSize; j++) { const BasicResult& item = queryResults[j]; if (item.VID < 0) break; if (item.VID == node) continue; bool good = true; - for (int k = 0; k < count; k++) { + for (DimensionType k = 0; k < count; k++) { if (index->ComputeDistance(index->GetSample(nodes[k]), index->GetSample(item.VID)) <= item.Dist) { good = false; break; @@ -29,21 +29,21 @@ namespace SPTAG } if (good) nodes[count++] = item.VID; } - for (int j = count; j < m_iNeighborhoodSize; j++) nodes[j] = -1; + for (DimensionType j = count; j < m_iNeighborhoodSize; j++) nodes[j] = -1; } - void InsertNeighbors(VectorIndex* index, const int node, int insertNode, float insertDist) + void InsertNeighbors(VectorIndex* index, const SizeType node, SizeType insertNode, float insertDist) { - int* nodes = m_pNeighborhoodGraph[node]; - for (int k = 0; k < m_iNeighborhoodSize; k++) + SizeType* nodes = m_pNeighborhoodGraph[node]; + for (DimensionType k = 0; k < m_iNeighborhoodSize; k++) { - int tmpNode = nodes[k]; + SizeType tmpNode = nodes[k]; if (tmpNode < -1) continue; if (tmpNode < 0) { bool good = true; - for (int t = 0; t < k; t++) { + for (DimensionType t = 0; t < k; t++) { if (index->ComputeDistance(index->GetSample(insertNode), index->GetSample(nodes[t])) < insertDist) { good = false; break; @@ -58,7 +58,7 @@ namespace SPTAG if (insertDist < tmpDist || (insertDist == tmpDist && insertNode < tmpNode)) { bool good = true; - for (int t = 0; t < k; t++) { + for (DimensionType t = 0; t < k; t++) { if (index->ComputeDistance(index->GetSample(insertNode), index->GetSample(nodes[t])) < insertDist) { good = false; break; @@ -76,33 +76,33 @@ namespace SPTAG } } - float GraphAccuracyEstimation(VectorIndex* index, const int samples, const std::unordered_map* idmap = nullptr) + float GraphAccuracyEstimation(VectorIndex* index, const SizeType samples, const std::unordered_map* idmap = nullptr) { - int* correct = new int[samples]; + DimensionType* correct = new DimensionType[samples]; #pragma omp parallel for schedule(dynamic) - for (int i = 0; i < samples; i++) + for (SizeType i = 0; i < samples; i++) { - int x = COMMON::Utils::rand_int(m_iGraphSize); + SizeType x = COMMON::Utils::rand(m_iGraphSize); //int x = i; COMMON::QueryResultSet query(nullptr, m_iCEF); - for (int y = 0; y < m_iGraphSize; y++) + for (SizeType y = 0; y < m_iGraphSize; y++) { if ((idmap != nullptr && idmap->find(y) != idmap->end())) continue; float dist = index->ComputeDistance(index->GetSample(x), index->GetSample(y)); query.AddPoint(y, dist); } query.SortResult(); - int * exact_rng = new int[m_iNeighborhoodSize]; + SizeType * exact_rng = new SizeType[m_iNeighborhoodSize]; RebuildNeighbors(index, x, exact_rng, query.GetResults(), m_iCEF); correct[i] = 0; - for (int j = 0; j < m_iNeighborhoodSize; j++) { + for (DimensionType j = 0; j < m_iNeighborhoodSize; j++) { if (exact_rng[j] == -1) { correct[i] += m_iNeighborhoodSize - j; break; } - for (int k = 0; k < m_iNeighborhoodSize; k++) + for (DimensionType k = 0; k < m_iNeighborhoodSize; k++) if ((m_pNeighborhoodGraph)[x][k] == exact_rng[j]) { correct[i]++; break; @@ -111,7 +111,7 @@ namespace SPTAG delete[] exact_rng; } float acc = 0; - for (int i = 0; i < samples; i++) acc += float(correct[i]); + for (SizeType i = 0; i < samples; i++) acc += float(correct[i]); acc = acc / samples / m_iNeighborhoodSize; delete[] correct; return acc; diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/WorkSpace.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/WorkSpace.h index 74d0a5039b456c9df4c69bac95a6f5222adeeaf7..c236d45a1c8628b7e6064cb14c5fdfd8d749eb95 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/WorkSpace.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/WorkSpace.h @@ -14,10 +14,10 @@ namespace SPTAG // node type in the priority queue struct HeapCell { - int node; + SizeType node; float distance; - HeapCell(int _node = -1, float _distance = MaxDist) : node(_node), distance(_distance) {} + HeapCell(SizeType _node = -1, float _distance = MaxDist) : node(_node), distance(_distance) {} inline bool operator < (const HeapCell& rhs) { @@ -45,12 +45,12 @@ namespace SPTAG // Record 2 hash tables. // [0~m_poolSize + 1) is the first block. // [m_poolSize + 1, 2*(m_poolSize + 1)) is the second block; - int m_hashTable[(m_poolSize + 1) * 2]; + SizeType m_hashTable[(m_poolSize + 1) * 2]; - inline unsigned hash_func2(int idx, int loop) + inline unsigned hash_func2(unsigned idx, int loop) { - return ((unsigned)idx + loop) & m_poolSize; + return (idx + loop) & m_poolSize; } @@ -65,7 +65,7 @@ namespace SPTAG ~OptHashPosVector() {} - void Init(int size) + void Init(SizeType size) { m_secondHash = true; clear(); @@ -76,31 +76,31 @@ namespace SPTAG if (!m_secondHash) { // Clear first block. - memset(&m_hashTable[0], 0, sizeof(int)*(m_poolSize + 1)); + memset(&m_hashTable[0], 0, sizeof(SizeType)*(m_poolSize + 1)); } else { // Clear all blocks. - memset(&m_hashTable[0], 0, 2 * sizeof(int) * (m_poolSize + 1)); + memset(&m_hashTable[0], 0, 2 * sizeof(SizeType) * (m_poolSize + 1)); m_secondHash = false; } } - inline bool CheckAndSet(int idx) + inline bool CheckAndSet(SizeType idx) { // Inner Index is begin from 1 return _CheckAndSet(&m_hashTable[0], idx + 1) == 0; } - inline int _CheckAndSet(int* hashTable, int idx) + inline int _CheckAndSet(SizeType* hashTable, SizeType idx) { - unsigned index, loop; + unsigned index; // Get first hash position. - index = hash_func(idx); - for (loop = 0; loop < m_maxLoop; ++loop) + index = hash_func((unsigned)idx); + for (int loop = 0; loop < m_maxLoop; ++loop) { if (!hashTable[index]) { @@ -132,7 +132,7 @@ namespace SPTAG // Variables for each single NN search struct WorkSpace { - void Initialize(int maxCheck, int dataSize) + void Initialize(int maxCheck, SizeType dataSize) { nodeCheckStatus.Init(dataSize); m_SPTQueue.Resize(maxCheck * 10); @@ -158,7 +158,7 @@ namespace SPTAG m_iNumOfContinuousNoBetterPropagation = 0; } - inline bool CheckAndSet(int idx) + inline bool CheckAndSet(SizeType idx) { return nodeCheckStatus.CheckAndSet(idx); } diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/WorkSpacePool.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/WorkSpacePool.h index 9a7154e693e459837802d2d79494862c800dcc1c..a322f42af468b29b186c9c903047c71514b599f3 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/WorkSpacePool.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/WorkSpacePool.h @@ -17,7 +17,7 @@ namespace COMMON class WorkSpacePool { public: - WorkSpacePool(int p_maxCheck, int p_vectorCount); + WorkSpacePool(int p_maxCheck, SizeType p_vectorCount); virtual ~WorkSpacePool(); @@ -34,7 +34,7 @@ private: int m_maxCheck; - int m_vectorCount; + SizeType m_vectorCount; }; } diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/CommonDataStructure.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/CommonDataStructure.h index 74d7980f4b292bd5628cd273f2be7251c1671063..c158fc880205b8ee626233ec46c2b6bed2e2faed 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/CommonDataStructure.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/CommonDataStructure.h @@ -4,53 +4,223 @@ #ifndef _SPTAG_COMMONDATASTRUCTURE_H_ #define _SPTAG_COMMONDATASTRUCTURE_H_ -#include "Common.h" +#include "inc/Core/Common.h" namespace SPTAG { -class ByteArray +template +class Array { public: - ByteArray(); + Array(); - ByteArray(ByteArray&& p_right); + Array(T* p_array, std::size_t p_length, bool p_transferOwnership); + + Array(T* p_array, std::size_t p_length, std::shared_ptr p_dataHolder); - ByteArray(std::uint8_t* p_array, std::size_t p_length, bool p_transferOnwership); + Array(Array&& p_right); - ByteArray(std::uint8_t* p_array, std::size_t p_length, std::shared_ptr p_dataHolder); + Array(const Array& p_right); - ByteArray(const ByteArray& p_right); + Array& operator= (Array&& p_right); - ByteArray& operator= (const ByteArray& p_right); + Array& operator= (const Array& p_right); - ByteArray& operator= (ByteArray&& p_right); + T& operator[] (std::size_t p_index); - ~ByteArray(); + const T& operator[] (std::size_t p_index) const; - static ByteArray Alloc(std::size_t p_length); + ~Array(); - std::uint8_t* Data() const; + T* Data() const; - std::size_t Length() const; - - void SetData(std::uint8_t* p_array, std::size_t p_length); + std::size_t Length() const; - std::shared_ptr DataHolder() const; + std::shared_ptr DataHolder() const; + + void Set(T* p_array, std::size_t p_length, bool p_transferOwnership); void Clear(); - const static ByteArray c_empty; + static Array Alloc(std::size_t p_length); + + const static Array c_empty; private: - std::uint8_t* m_data; + T* m_data; std::size_t m_length; // Notice this is holding an array. Set correct deleter for this. - std::shared_ptr m_dataHolder; + std::shared_ptr m_dataHolder; }; +template +const Array Array::c_empty; + + +template +Array::Array() + : m_data(nullptr), + m_length(0) +{ +} + +template +Array::Array(T* p_array, std::size_t p_length, bool p_transferOnwership) + + : m_data(p_array), + m_length(p_length) +{ + if (p_transferOnwership) + { + m_dataHolder.reset(m_data, std::default_delete()); + } +} + + +template +Array::Array(T* p_array, std::size_t p_length, std::shared_ptr p_dataHolder) + : m_data(p_array), + m_length(p_length), + m_dataHolder(std::move(p_dataHolder)) +{ +} + + +template +Array::Array(Array&& p_right) + : m_data(p_right.m_data), + m_length(p_right.m_length), + m_dataHolder(std::move(p_right.m_dataHolder)) +{ +} + + +template +Array::Array(const Array& p_right) + : m_data(p_right.m_data), + m_length(p_right.m_length), + m_dataHolder(p_right.m_dataHolder) +{ +} + + +template +Array& +Array::operator= (Array&& p_right) +{ + m_data = p_right.m_data; + m_length = p_right.m_length; + m_dataHolder = std::move(p_right.m_dataHolder); + + return *this; +} + + +template +Array& +Array::operator= (const Array& p_right) +{ + m_data = p_right.m_data; + m_length = p_right.m_length; + m_dataHolder = p_right.m_dataHolder; + + return *this; +} + + +template +T& +Array::operator[] (std::size_t p_index) +{ + return m_data[p_index]; +} + + +template +const T& +Array::operator[] (std::size_t p_index) const +{ + return m_data[p_index]; +} + + +template +Array::~Array() +{ +} + + +template +T* +Array::Data() const +{ + return m_data; +} + + +template +std::size_t +Array::Length() const +{ + return m_length; +} + + +template +std::shared_ptr +Array::DataHolder() const +{ + return m_dataHolder; +} + + +template +void +Array::Set(T* p_array, std::size_t p_length, bool p_transferOwnership) +{ + m_data = p_array; + m_length = p_length; + + if (p_transferOwnership) + { + m_dataHolder.reset(m_data, std::default_delete()); + } +} + + +template +void +Array::Clear() +{ + m_data = nullptr; + m_length = 0; + m_dataHolder.reset(); +} + + +template +Array +Array::Alloc(std::size_t p_length) +{ + Array arr; + if (0 == p_length) + { + return arr; + } + + arr.m_dataHolder.reset(new T[p_length], std::default_delete()); + + arr.m_length = p_length; + arr.m_data = arr.m_dataHolder.get(); + return arr; +} + + +typedef Array ByteArray; + } // namespace SPTAG #endif // _SPTAG_COMMONDATASTRUCTURE_H_ diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/DefinitionList.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/DefinitionList.h index f5b66422d79b8627bf9308b92e6787af062012ad..91014963c63ad4222ee63e83668b538430baf30a 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/DefinitionList.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/DefinitionList.h @@ -28,6 +28,8 @@ DefineErrorCode(FailedOpenFile, 0x0002) DefineErrorCode(FailedCreateFile, 0x0003) DefineErrorCode(ParamNotFound, 0x0010) DefineErrorCode(FailedParseValue, 0x0011) +DefineErrorCode(MemoryOverFlow, 0x0012) +DefineErrorCode(LackOfInputs, 0x0013) // 0x1000 ~ 0x1FFF Index Build Status diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/Index.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/Index.h index 7ab4b8daec45d7764e0d676bdf0c3a92af96ba28..668d423b524012ecc1f3ce5e671b5284e223edcb 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/Index.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/Index.h @@ -15,12 +15,12 @@ #include "../Common/WorkSpacePool.h" #include "../Common/RelativeNeighborhoodGraph.h" #include "../Common/KDTree.h" +#include "inc/Helper/ConcurrentSet.h" #include "inc/Helper/StringConvert.h" #include "inc/Helper/SimpleIniReader.h" #include #include -#include namespace SPTAG { @@ -48,14 +48,16 @@ namespace SPTAG std::string m_sKDTFilename; std::string m_sGraphFilename; std::string m_sDataPointsFilename; + std::string m_sDeleteDataPointsFilename; - std::mutex m_dataLock; // protect data and graph - tbb::concurrent_unordered_set m_deletedID; + std::mutex m_dataAddLock; // protect data and graph + Helper::Concurrent::ConcurrentSet m_deletedID; + float m_fDeletePercentageForRefine; std::unique_ptr m_workSpacePool; int m_iNumberOfThreads; DistCalcMethod m_iDistCalcMethod; - float(*m_fComputeDistance)(const T* pX, const T* pY, int length); + float(*m_fComputeDistance)(const T* pX, const T* pY, DimensionType length); int m_iMaxCheck; int m_iThresholdOfNumberOfContinuousNoBetterPropagation; @@ -63,20 +65,21 @@ namespace SPTAG int m_iNumberOfOtherDynamicPivots; public: Index() - { + { #define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \ VarName = DefaultValue; \ #include "inc/Core/KDT/ParameterDefinitionList.h" #undef DefineKDTParameter - - m_fComputeDistance = COMMON::DistanceCalcSelector(m_iDistCalcMethod); - } + + m_pSamples.SetName("Vector"); + m_fComputeDistance = COMMON::DistanceCalcSelector(m_iDistCalcMethod); + } ~Index() {} - inline int GetNumSamples() const { return m_pSamples.R(); } - inline int GetFeatureDim() const { return m_pSamples.C(); } + inline SizeType GetNumSamples() const { return m_pSamples.R(); } + inline DimensionType GetFeatureDim() const { return m_pSamples.C(); } inline int GetCurrMaxCheck() const { return m_iMaxCheck; } inline int GetNumThreads() const { return m_iNumberOfThreads; } @@ -85,25 +88,41 @@ namespace SPTAG inline VectorValueType GetVectorValueType() const { return GetEnumValueType(); } inline float ComputeDistance(const void* pX, const void* pY) const { return m_fComputeDistance((const T*)pX, (const T*)pY, m_pSamples.C()); } - inline const void* GetSample(const int idx) const { return (void*)m_pSamples[idx]; } - - ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension); - - ErrorCode SaveIndexToMemory(std::vector& p_indexBlobs, std::vector& p_indexBlobsLen); - ErrorCode LoadIndexFromMemory(const std::vector& p_indexBlobs); - - ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout); - ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader); + inline const void* GetSample(const SizeType idx) const { return (void*)m_pSamples[idx]; } + inline bool ContainSample(const SizeType idx) const { return !m_deletedID.contains(idx); } + inline bool NeedRefine() const { return m_deletedID.size() >= (size_t)(GetNumSamples() * m_fDeletePercentageForRefine); } + std::shared_ptr> BufferSize() const + { + std::shared_ptr> buffersize(new std::vector); + buffersize->push_back(m_pSamples.BufferSize()); + buffersize->push_back(m_pTrees.BufferSize()); + buffersize->push_back(m_pGraph.BufferSize()); + buffersize->push_back(m_deletedID.bufferSize()); + return std::move(buffersize); + } + + ErrorCode SaveConfig(std::ostream& p_configout) const; + ErrorCode SaveIndexData(const std::string& p_folderPath); + ErrorCode SaveIndexData(const std::vector& p_indexStreams); + + ErrorCode LoadConfig(Helper::IniReader& p_reader); + ErrorCode LoadIndexData(const std::string& p_folderPath); + ErrorCode LoadIndexDataFromMemory(const std::vector& p_indexBlobs); + + ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension); ErrorCode SearchIndex(QueryResult &p_query) const; - ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension); - ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum); + ErrorCode AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start = nullptr); + ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum); + ErrorCode DeleteIndex(const SizeType& p_id); ErrorCode SetParameter(const char* p_param, const char* p_value); std::string GetParameter(const char* p_param) const; - private: ErrorCode RefineIndex(const std::string& p_folderPath); - void SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const; + ErrorCode RefineIndex(const std::vector& p_indexStreams); + + private: + void SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet &p_deleted) const; void SearchIndexWithoutDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space) const; }; } // namespace KDT diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/ParameterDefinitionList.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/ParameterDefinitionList.h index 70d70ccbe7c6aa14cfd2f15ccf58edc7ba1846d1..c36cb178c1c420481c1f0b633ad5284c9c509021 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/ParameterDefinitionList.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/ParameterDefinitionList.h @@ -7,16 +7,17 @@ DefineKDTParameter(m_sKDTFilename, std::string, std::string("tree.bin"), "TreeFilePath") DefineKDTParameter(m_sGraphFilename, std::string, std::string("graph.bin"), "GraphFilePath") DefineKDTParameter(m_sDataPointsFilename, std::string, std::string("vectors.bin"), "VectorFilePath") +DefineKDTParameter(m_sDeleteDataPointsFilename, std::string, std::string("deletes.bin"), "DeleteVectorFilePath") DefineKDTParameter(m_pTrees.m_iTreeNumber, int, 1L, "KDTNumber") DefineKDTParameter(m_pTrees.m_numTopDimensionKDTSplit, int, 5L, "NumTopDimensionKDTSplit") -DefineKDTParameter(m_pTrees.m_iSamples, int, 100L, "NumSamplesKDTSplitConsideration") +DefineKDTParameter(m_pTrees.m_iSamples, int, 100L, "Samples") DefineKDTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TPTNumber") DefineKDTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize") DefineKDTParameter(m_pGraph.m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTPTSplit") -DefineKDTParameter(m_pGraph.m_iNeighborhoodSize, int, 32L, "NeighborhoodSize") +DefineKDTParameter(m_pGraph.m_iNeighborhoodSize, DimensionType, 32L, "NeighborhoodSize") DefineKDTParameter(m_pGraph.m_iNeighborhoodScale, int, 2L, "GraphNeighborhoodScale") DefineKDTParameter(m_pGraph.m_iCEFScale, int, 2L, "GraphCEFScale") DefineKDTParameter(m_pGraph.m_iRefineIter, int, 0L, "RefineIterations") @@ -26,6 +27,7 @@ DefineKDTParameter(m_pGraph.m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckFor DefineKDTParameter(m_iNumberOfThreads, int, 1L, "NumberOfThreads") DefineKDTParameter(m_iDistCalcMethod, SPTAG::DistCalcMethod, SPTAG::DistCalcMethod::Cosine, "DistCalcMethod") +DefineKDTParameter(m_fDeletePercentageForRefine, float, 0.4F, "DeletePercentageForRefine") DefineKDTParameter(m_iMaxCheck, int, 8192L, "MaxCheck") DefineKDTParameter(m_iThresholdOfNumberOfContinuousNoBetterPropagation, int, 3L, "ThresholdOfNumberOfContinuousNoBetterPropagation") DefineKDTParameter(m_iNumberOfInitialDynamicPivots, int, 50L, "NumberOfInitialDynamicPivots") diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/MetadataSet.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/MetadataSet.h index 7b7baed50d73139433fe35fb11f873010392e3d4..37eba14491e17d996c9637c74045e287e5031e3f 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/MetadataSet.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/MetadataSet.h @@ -19,23 +19,23 @@ public: virtual ~MetadataSet(); - virtual ByteArray GetMetadata(IndexType p_vectorID) const = 0; + virtual ByteArray GetMetadata(SizeType p_vectorID) const = 0; virtual SizeType Count() const = 0; virtual bool Available() const = 0; - virtual void AddBatch(MetadataSet& data) = 0; + virtual std::pair BufferSize() const = 0; - virtual ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile) = 0; + virtual void AddBatch(MetadataSet& data) = 0; - virtual ErrorCode SaveMetadataToMemory(void **pGraphMemFile, int64_t &len) = 0; + virtual ErrorCode SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut) = 0; - virtual ErrorCode LoadMetadataFromMemory(void *pGraphMemFile) = 0; + virtual ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile) = 0; - virtual ErrorCode RefineMetadata(std::vector& indices, const std::string& p_folderPath); + virtual ErrorCode RefineMetadata(std::vector& indices, std::ostream& p_metaOut, std::ostream& p_metaIndexOut); - static ErrorCode MetaCopy(const std::string& p_src, const std::string& p_dst); + virtual ErrorCode RefineMetadata(std::vector& indices, const std::string& p_metaFile, const std::string& p_metaindexFile); }; @@ -46,19 +46,20 @@ public: ~FileMetadataSet(); - ByteArray GetMetadata(IndexType p_vectorID) const; + ByteArray GetMetadata(SizeType p_vectorID) const; SizeType Count() const; bool Available() const; + std::pair BufferSize() const; + void AddBatch(MetadataSet& data); - ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile); + ErrorCode SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut); - ErrorCode SaveMetadataToMemory(void **pGraphMemFile, int64_t &len); + ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile); - ErrorCode LoadMetadataFromMemory(void *pGraphMemFile); private: std::ifstream* m_fp = nullptr; @@ -77,25 +78,24 @@ private: class MemMetadataSet : public MetadataSet { public: - MemMetadataSet() = default; - MemMetadataSet(ByteArray p_metadata, ByteArray p_offsets, SizeType p_count); ~MemMetadataSet(); - ByteArray GetMetadata(IndexType p_vectorID) const; + ByteArray GetMetadata(SizeType p_vectorID) const; SizeType Count() const; bool Available() const; + std::pair BufferSize() const; + void AddBatch(MetadataSet& data); - ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile); + ErrorCode SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut); - ErrorCode SaveMetadataToMemory(void **pGraphMemFile, int64_t &len); + ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile); - ErrorCode LoadMetadataFromMemory(void *pGraphMemFile); private: std::vector m_offsets; diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/SearchQuery.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/SearchQuery.h index ede82cc938e2d7c48d77d6fac18fb05f1659e444..017b1e2e01392345b9c83a666ae26d6dee2de2d5 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/SearchQuery.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/SearchQuery.h @@ -4,24 +4,13 @@ #ifndef _SPTAG_SEARCHQUERY_H_ #define _SPTAG_SEARCHQUERY_H_ -#include "CommonDataStructure.h" +#include "SearchResult.h" #include namespace SPTAG { - struct BasicResult - { - int VID; - float Dist; - - BasicResult() : VID(-1), Dist(MaxDist) {} - - BasicResult(int p_vid, float p_dist) : VID(p_vid), Dist(p_dist) {} - }; - - // Space to save temporary answer, similar with TopKCache class QueryResult { @@ -38,39 +27,26 @@ public: QueryResult(const void* p_target, int p_resultNum, bool p_withMeta) - : m_target(nullptr), - m_resultNum(0), - m_withMeta(false) { Init(p_target, p_resultNum, p_withMeta); } - QueryResult(const void* p_target, int p_resultNum, std::vector& p_results) + QueryResult(const void* p_target, int p_resultNum, bool p_withMeta, BasicResult* p_results) : m_target(p_target), m_resultNum(p_resultNum), - m_withMeta(false) + m_withMeta(p_withMeta) { - p_results.resize(p_resultNum); - m_results.reset(p_results.data()); + m_results.Set(p_results, p_resultNum, false); } QueryResult(const QueryResult& p_other) - : m_target(p_other.m_target), - m_resultNum(p_other.m_resultNum), - m_withMeta(p_other.m_withMeta) { + Init(p_other.m_target, p_other.m_resultNum, p_other.m_withMeta); if (m_resultNum > 0) { - m_results.reset(new BasicResult[m_resultNum]); - std::memcpy(m_results.get(), p_other.m_results.get(), sizeof(BasicResult) * m_resultNum); - - if (m_withMeta) - { - m_metadatas.reset(new ByteArray[m_resultNum]); - std::copy(p_other.m_metadatas.get(), p_other.m_metadatas.get() + m_resultNum, m_metadatas.get()); - } + std::copy(p_other.m_results.Data(), p_other.m_results.Data() + m_resultNum, m_results.Data()); } } @@ -78,14 +54,9 @@ public: QueryResult& operator=(const QueryResult& p_other) { Init(p_other.m_target, p_other.m_resultNum, p_other.m_withMeta); - if (m_resultNum > 0) { - std::memcpy(m_results.get(), p_other.m_results.get(), sizeof(BasicResult) * m_resultNum); - if (m_withMeta) - { - std::copy(p_other.m_metadatas.get(), p_other.m_metadatas.get() + m_resultNum, m_metadatas.get()); - } + std::copy(p_other.m_results.Data(), p_other.m_results.Data() + m_resultNum, m_results.Data()); } return *this; @@ -100,18 +71,10 @@ public: inline void Init(const void* p_target, int p_resultNum, bool p_withMeta) { m_target = p_target; - if (p_resultNum > m_resultNum) - { - m_results.reset(new BasicResult[p_resultNum]); - } - - if (p_withMeta && (!m_withMeta || p_resultNum > m_resultNum)) - { - m_metadatas.reset(new ByteArray[p_resultNum]); - } - m_resultNum = p_resultNum; m_withMeta = p_withMeta; + + m_results = Array::Alloc(p_resultNum); } @@ -135,11 +98,11 @@ public: inline BasicResult* GetResult(int i) const { - return i < m_resultNum ? m_results.get() + i : nullptr; + return i < m_resultNum ? m_results.Data() + i : nullptr; } - inline void SetResult(int p_index, int p_VID, float p_dist) + inline void SetResult(int p_index, SizeType p_VID, float p_dist) { if (p_index < m_resultNum) { @@ -151,7 +114,7 @@ public: inline BasicResult* GetResults() const { - return m_results.get(); + return m_results.Data(); } @@ -165,7 +128,7 @@ public: { if (p_index < m_resultNum && m_withMeta) { - return m_metadatas[p_index]; + return m_results[p_index].Meta; } return ByteArray::c_empty; @@ -176,7 +139,7 @@ public: { if (p_index < m_resultNum && m_withMeta) { - m_metadatas[p_index] = std::move(p_metadata); + m_results[p_index].Meta = std::move(p_metadata); } } @@ -187,39 +150,32 @@ public: { m_results[i].VID = -1; m_results[i].Dist = MaxDist; - } - - if (m_withMeta) - { - for (int i = 0; i < m_resultNum; i++) - { - m_metadatas[i].Clear(); - } + m_results[i].Meta.Clear(); } } iterator begin() { - return m_results.get(); + return m_results.Data(); } iterator end() { - return m_results.get() + m_resultNum; + return m_results.Data() + m_resultNum; } const_iterator begin() const { - return m_results.get(); + return m_results.Data(); } const_iterator end() const { - return m_results.get() + m_resultNum; + return m_results.Data() + m_resultNum; } @@ -230,9 +186,7 @@ protected: bool m_withMeta; - std::unique_ptr m_results; - - std::unique_ptr m_metadatas; + Array m_results; }; } // namespace SPTAG diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/SearchResult.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/SearchResult.h new file mode 100644 index 0000000000000000000000000000000000000000..64e173030b14f42b048d197eced225f52422cf8d --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/SearchResult.h @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SPTAG_SEARCHRESULT_H_ +#define _SPTAG_SEARCHRESULT_H_ + +#include "CommonDataStructure.h" + +namespace SPTAG +{ + struct BasicResult + { + SizeType VID; + float Dist; + ByteArray Meta; + + BasicResult() : VID(-1), Dist(MaxDist) {} + + BasicResult(SizeType p_vid, float p_dist) : VID(p_vid), Dist(p_dist) {} + + BasicResult(SizeType p_vid, float p_dist, ByteArray p_meta) : VID(p_vid), Dist(p_dist), Meta(p_meta) {} + }; + +} // namespace SPTAG + +#endif // _SPTAG_SEARCHRESULT_H_ diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorIndex.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorIndex.h index 5fc508e3ad99b187d74e6ece78197beb4ec217b9..49475794d5f215f32a35987dfa712a1c7f773196 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorIndex.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorIndex.h @@ -10,6 +10,8 @@ #include "MetadataSet.h" #include "inc/Helper/SimpleIniReader.h" +#include + namespace SPTAG { @@ -20,59 +22,58 @@ public: virtual ~VectorIndex(); - virtual ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout) = 0; - - virtual ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader) = 0; + virtual ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension) = 0; - virtual ErrorCode SaveIndexToMemory(std::vector& p_indexBlobs, std::vector& p_indexBlobsLen) = 0; + virtual ErrorCode AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start = nullptr) = 0; - virtual ErrorCode LoadIndexFromMemory(const std::vector& p_indexBlobs) = 0; - - virtual ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension) = 0; + virtual ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum) = 0; virtual ErrorCode SearchIndex(QueryResult& p_results) const = 0; - - virtual ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension) = 0; - - virtual ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum) = 0; - - //virtual ErrorCode AddIndexWithID(const void* p_vector, const int& p_id) = 0; - - //virtual ErrorCode DeleteIndexWithID(const void* p_vector, const int& p_id) = 0; virtual float ComputeDistance(const void* pX, const void* pY) const = 0; - virtual const void* GetSample(const int idx) const = 0; - virtual int GetFeatureDim() const = 0; - virtual int GetNumSamples() const = 0; + virtual const void* GetSample(const SizeType idx) const = 0; + virtual bool ContainSample(const SizeType idx) const = 0; + virtual bool NeedRefine() const = 0; + + virtual DimensionType GetFeatureDim() const = 0; + virtual SizeType GetNumSamples() const = 0; virtual DistCalcMethod GetDistCalcMethod() const = 0; virtual IndexAlgoType GetIndexAlgoType() const = 0; virtual VectorValueType GetVectorValueType() const = 0; - virtual int GetNumThreads() const = 0; virtual std::string GetParameter(const char* p_param) const = 0; virtual ErrorCode SetParameter(const char* p_param, const char* p_value) = 0; + virtual std::shared_ptr> CalculateBufferSize() const; + + virtual ErrorCode LoadIndex(const std::string& p_config, const std::vector& p_indexBlobs); + virtual ErrorCode LoadIndex(const std::string& p_folderPath); + virtual ErrorCode SaveIndex(std::string& p_config, const std::vector& p_indexBlobs); + virtual ErrorCode SaveIndex(const std::string& p_folderPath); - virtual ErrorCode BuildIndex(std::shared_ptr p_vectorSet, std::shared_ptr p_metadataSet); - - virtual ErrorCode SearchIndex(const void* p_vector, int p_neighborCount, std::vector& p_results) const; + virtual ErrorCode BuildIndex(std::shared_ptr p_vectorSet, std::shared_ptr p_metadataSet, bool p_withMetaIndex = false); virtual ErrorCode AddIndex(std::shared_ptr p_vectorSet, std::shared_ptr p_metadataSet); + virtual ErrorCode DeleteIndex(ByteArray p_meta); + + virtual const void* GetSample(ByteArray p_meta); + + virtual ErrorCode SearchIndex(const void* p_vector, int p_neighborCount, bool p_withMeta, BasicResult* p_results) const; + virtual std::string GetParameter(const std::string& p_param) const; virtual ErrorCode SetParameter(const std::string& p_param, const std::string& p_value); - virtual ByteArray GetMetadata(IndexType p_vectorID) const; + virtual ByteArray GetMetadata(SizeType p_vectorID) const; virtual void SetMetadata(const std::string& p_metadataFilePath, const std::string& p_metadataIndexPath); virtual std::string GetIndexName() const { - if (m_sIndexName == "") - return Helper::Convert::ConvertToString(GetIndexAlgoType()); + if (m_sIndexName == "") return Helper::Convert::ConvertToString(GetIndexAlgoType()); return m_sIndexName; } virtual void SetIndexName(std::string p_name) { m_sIndexName = p_name; } @@ -83,9 +84,42 @@ public: static ErrorCode LoadIndex(const std::string& p_loaderFilePath, std::shared_ptr& p_vectorIndex); + static ErrorCode LoadIndex(const std::string& p_config, const std::vector& p_indexBlobs, std::shared_ptr& p_vectorIndex); + +protected: + virtual std::shared_ptr> BufferSize() const = 0; + + virtual ErrorCode SaveConfig(std::ostream& p_configout) const = 0; + + virtual ErrorCode SaveIndexData(const std::string& p_folderPath) = 0; + + virtual ErrorCode SaveIndexData(const std::vector& p_indexStreams) = 0; + + virtual ErrorCode LoadConfig(Helper::IniReader& p_reader) = 0; + + virtual ErrorCode LoadIndexData(const std::string& p_folderPath) = 0; + + virtual ErrorCode LoadIndexDataFromMemory(const std::vector& p_indexBlobs) = 0; + + virtual ErrorCode DeleteIndex(const SizeType& p_id) = 0; + + virtual ErrorCode RefineIndex(const std::string& p_folderPath) = 0; + + virtual ErrorCode RefineIndex(const std::vector& p_indexStreams) = 0; + +private: + void BuildMetaMapping(); + + ErrorCode LoadIndexConfig(Helper::IniReader& p_reader); + + ErrorCode SaveIndexConfig(std::ostream& p_configOut); + protected: std::string m_sIndexName; + std::string m_sMetadataFile = "metadata.bin"; + std::string m_sMetadataIndexFile = "metadataIndex.bin"; std::shared_ptr m_pMetadata; + std::unique_ptr> m_pMetaToVec; }; diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorSet.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorSet.h index 4d0727aa81961ca5d23a8ff9749135cd5f4fd082..c394c701ffc5619ab03e00ee0319000f72382e47 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorSet.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorSet.h @@ -18,11 +18,11 @@ public: virtual VectorValueType GetValueType() const = 0; - virtual void* GetVector(IndexType p_vectorID) const = 0; + virtual void* GetVector(SizeType p_vectorID) const = 0; virtual void* GetData() const = 0; - virtual SizeType Dimension() const = 0; + virtual DimensionType Dimension() const = 0; virtual SizeType Count() const = 0; @@ -37,18 +37,18 @@ class BasicVectorSet : public VectorSet public: BasicVectorSet(const ByteArray& p_bytesArray, VectorValueType p_valueType, - SizeType p_dimension, + DimensionType p_dimension, SizeType p_vectorCount); virtual ~BasicVectorSet(); virtual VectorValueType GetValueType() const; - virtual void* GetVector(IndexType p_vectorID) const; + virtual void* GetVector(SizeType p_vectorID) const; virtual void* GetData() const; - virtual SizeType Dimension() const; + virtual DimensionType Dimension() const; virtual SizeType Count() const; @@ -61,7 +61,7 @@ private: VectorValueType m_valueType; - SizeType m_dimension; + DimensionType m_dimension; SizeType m_vectorCount; diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/BufferStream.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/BufferStream.h new file mode 100644 index 0000000000000000000000000000000000000000..c97be04f12397d7ccf4dc8ec4b5ca22e9f2ac8db --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/BufferStream.h @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SPTAG_HELPER_BUFFERSTREAM_H_ +#define _SPTAG_HELPER_BUFFERSTREAM_H_ + +#include +#include +#include + +namespace SPTAG +{ + namespace Helper + { + struct streambuf : public std::basic_streambuf + { + streambuf(char* buffer, size_t size) + { + setp(buffer, buffer + size); + } + }; + + class obufferstream : public std::ostream + { + public: + obufferstream(streambuf* buf, bool transferOwnership) : std::ostream(buf) + { + if (transferOwnership) + m_bufHolder.reset(buf, std::default_delete()); + } + + private: + std::shared_ptr m_bufHolder; + }; + } // namespace Helper +} // namespace SPTAG + +#endif // _SPTAG_HELPER_BUFFERSTREAM_H_ + diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/ConcurrentSet.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/ConcurrentSet.h new file mode 100644 index 0000000000000000000000000000000000000000..61254dc2ebf1b34389c6ff5dfbbd81c8672bdb55 --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/ConcurrentSet.h @@ -0,0 +1,148 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SPTAG_HELPER_CONCURRENTSET_H_ +#define _SPTAG_HELPER_CONCURRENTSET_H_ + +#include +#include + +namespace SPTAG +{ + namespace Helper + { + namespace Concurrent + { + template + class ConcurrentSet + { + public: + ConcurrentSet(); + + ~ConcurrentSet(); + + size_t size() const; + + bool contains(const T& key) const; + + void insert(const T& key); + + std::shared_timed_mutex& getLock(); + + bool save(std::ostream& output); + + bool save(std::string filename); + + bool load(std::string filename); + + bool load(char* pmemoryFile); + + std::uint64_t bufferSize() const; + + private: + std::unique_ptr m_lock; + std::unordered_set m_data; + }; + + template + ConcurrentSet::ConcurrentSet() + { + m_lock.reset(new std::shared_timed_mutex); + } + + template + ConcurrentSet::~ConcurrentSet() + { + } + + template + size_t ConcurrentSet::size() const + { + std::shared_lock lock(*m_lock); + return m_data.size(); + } + + template + bool ConcurrentSet::contains(const T& key) const + { + std::shared_lock lock(*m_lock); + return (m_data.find(key) != m_data.end()); + } + + template + void ConcurrentSet::insert(const T& key) + { + std::unique_lock lock(*m_lock); + m_data.insert(key); + } + + template + std::shared_timed_mutex& ConcurrentSet::getLock() + { + return *m_lock; + } + + template + std::uint64_t ConcurrentSet::bufferSize() const + { + return sizeof(SizeType) + sizeof(T) * m_data.size(); + } + + template + bool ConcurrentSet::save(std::ostream& output) + { + SizeType count = (SizeType)m_data.size(); + output.write((char*)&count, sizeof(SizeType)); + for (auto iter = m_data.begin(); iter != m_data.end(); iter++) + output.write((char*)&(*iter), sizeof(T)); + std::cout << "Save DeleteID (" << count << ") Finish!" << std::endl; + return true; + } + + template + bool ConcurrentSet::save(std::string filename) + { + std::cout << "Save DeleteID To " << filename << std::endl; + std::ofstream output(filename, std::ios::binary); + if (!output.is_open()) return false; + save(output); + output.close(); + return true; + } + + template + bool ConcurrentSet::load(std::string filename) + { + std::cout << "Load DeleteID From " << filename << std::endl; + std::ifstream input(filename, std::ios::binary); + if (!input.is_open()) return false; + + SizeType count; + T ID; + input.read((char*)&count, sizeof(SizeType)); + for (SizeType i = 0; i < count; i++) + { + input.read((char*)&ID, sizeof(T)); + m_data.insert(ID); + } + input.close(); + std::cout << "Load DeleteID (" << count << ") Finish!" << std::endl; + return true; + } + + template + bool ConcurrentSet::load(char* pmemoryFile) + { + SizeType count; + count = *((SizeType*)pmemoryFile); + pmemoryFile += sizeof(SizeType); + + m_data.insert((T*)pmemoryFile, ((T*)pmemoryFile) + count); + pmemoryFile += sizeof(T) * count; + std::cout << "Load DeleteID (" << count << ") Finish!" << std::endl; + return true; + } + } + } +} +#endif // _SPTAG_HELPER_CONCURRENTSET_H_ \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/SimpleIniReader.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/SimpleIniReader.h index 31b797aba9977e454adcb1bb6ffcb113ff1a7fc6..ad8d58f6f75057907671ff16aef3875bfb022c5b 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/SimpleIniReader.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/SimpleIniReader.h @@ -31,6 +31,8 @@ public: ErrorCode LoadIniFile(const std::string& p_iniFilePath); + ErrorCode LoadIni(std::istream& p_input); + bool DoesSectionExist(const std::string& p_section) const; bool DoesParameterExist(const std::string& p_section, const std::string& p_param) const; diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/VectorSetReader.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/VectorSetReader.h new file mode 100644 index 0000000000000000000000000000000000000000..cd148c1d04567e206a2a355d74612a2ce0ba108c --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/VectorSetReader.h @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SPTAG_HELPER_VECTORSETREADER_H_ +#define _SPTAG_HELPER_VECTORSETREADER_H_ + +#include "inc/Core/Common.h" +#include "inc/Core/VectorSet.h" +#include "inc/Core/MetadataSet.h" +#include "inc/Helper/ArgumentsParser.h" + +#include + +namespace SPTAG +{ +namespace Helper +{ + +class ReaderOptions : public ArgumentsParser +{ +public: + ReaderOptions(VectorValueType p_valueType, DimensionType p_dimension, std::string p_vectorDelimiter = "|", std::uint32_t p_threadNum = 32); + + ~ReaderOptions(); + + std::uint32_t m_threadNum; + + DimensionType m_dimension; + + std::string m_vectorDelimiter; + + SPTAG::VectorValueType m_inputValueType; +}; + +class VectorSetReader +{ +public: + VectorSetReader(std::shared_ptr p_options); + + virtual ~VectorSetReader(); + + virtual ErrorCode LoadFile(const std::string& p_filePath) = 0; + + virtual std::shared_ptr GetVectorSet() const = 0; + + virtual std::shared_ptr GetMetadataSet() const = 0; + + static std::shared_ptr CreateInstance(std::shared_ptr p_options); + +protected: + std::shared_ptr m_options; +}; + + + +} // namespace Helper +} // namespace SPTAG + +#endif // _SPTAG_HELPER_VECTORSETREADER_H_ diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/IndexBuilder/VectorSetReaders/DefaultReader.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/VectorSetReaders/DefaultReader.h similarity index 83% rename from core/src/index/thirdparty/SPTAG/AnnService/inc/IndexBuilder/VectorSetReaders/DefaultReader.h rename to core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/VectorSetReaders/DefaultReader.h index e3e1911a9d6509d5138f6be308d9918c2ae12c80..52c8404caf687d444ba1760167fc2d8ebfe90893 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/IndexBuilder/VectorSetReaders/DefaultReader.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/VectorSetReaders/DefaultReader.h @@ -1,8 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#ifndef _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULTREADER_H_ -#define _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULTREADER_H_ +#ifndef _SPTAG_HELPER_VECTORSETREADERS_DEFAULTREADER_H_ +#define _SPTAG_HELPER_VECTORSETREADERS_DEFAULTREADER_H_ #include "../VectorSetReader.h" #include "inc/Helper/Concurrent.h" @@ -13,13 +13,13 @@ namespace SPTAG { -namespace IndexBuilder +namespace Helper { class DefaultReader : public VectorSetReader { public: - DefaultReader(std::shared_ptr p_options); + DefaultReader(std::shared_ptr p_options); virtual ~DefaultReader(); @@ -44,7 +44,7 @@ private: template bool TranslateVector(char* p_str, DataType* p_vector) { - std::uint32_t eleCount = 0; + DimensionType eleCount = 0; char* next = p_str; while ((*next) != '\0') { @@ -85,11 +85,11 @@ private: std::size_t m_subTaskBlocksize; - std::atomic m_totalRecordCount; + std::atomic m_totalRecordCount; std::atomic m_totalRecordVectorBytes; - std::vector m_subTaskRecordCount; + std::vector m_subTaskRecordCount; std::string m_vectorOutput; @@ -102,7 +102,7 @@ private: -} // namespace IndexBuilder +} // namespace Helper } // namespace SPTAG -#endif // _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULT_H_ +#endif // _SPTAG_HELPER_VECTORSETREADERS_DEFAULT_H_ diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/IndexBuilder/Options.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/IndexBuilder/Options.h index 7c939efae51e690253f0328e57844fe6300013df..b3b3e21e58a8bb556e9054868696c2555e9f561c 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/IndexBuilder/Options.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/IndexBuilder/Options.h @@ -5,7 +5,7 @@ #define _SPTAG_INDEXBUILDER_OPTIONS_H_ #include "inc/Core/Common.h" -#include "inc/Helper/ArgumentsParser.h" +#include "inc/Helper/VectorSetReader.h" #include #include @@ -16,21 +16,13 @@ namespace SPTAG namespace IndexBuilder { -class BuilderOptions : public Helper::ArgumentsParser +class BuilderOptions : public Helper::ReaderOptions { public: BuilderOptions(); ~BuilderOptions(); - std::uint32_t m_threadNum; - - std::uint32_t m_dimension; - - std::string m_vectorDelimiter; - - SPTAG::VectorValueType m_inputValueType; - std::string m_inputFiles; std::string m_outputFolder; diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/IndexBuilder/VectorSetReader.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/IndexBuilder/VectorSetReader.h deleted file mode 100644 index 6bb3026aa05ca87ea418f2cdd990c73d2f725028..0000000000000000000000000000000000000000 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/IndexBuilder/VectorSetReader.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#ifndef _SPTAG_INDEXBUILDER_VECTORSETREADER_H_ -#define _SPTAG_INDEXBUILDER_VECTORSETREADER_H_ - -#include "inc/Core/Common.h" -#include "inc/Core/VectorSet.h" -#include "inc/Core/MetadataSet.h" -#include "Options.h" - -#include - -namespace SPTAG -{ -namespace IndexBuilder -{ - -class VectorSetReader -{ -public: - VectorSetReader(std::shared_ptr p_options); - - virtual ~VectorSetReader(); - - virtual ErrorCode LoadFile(const std::string& p_filePath) = 0; - - virtual std::shared_ptr GetVectorSet() const = 0; - - virtual std::shared_ptr GetMetadataSet() const = 0; - - static std::shared_ptr CreateInstance(std::shared_ptr p_options); - -protected: - std::shared_ptr m_options; -}; - - - -} // namespace IndexBuilder -} // namespace SPTAG - -#endif // _SPTAG_INDEXBUILDER_VECTORSETREADER_H_ diff --git a/core/src/index/thirdparty/SPTAG/AnnService/packages.config b/core/src/index/thirdparty/SPTAG/AnnService/packages.config index 424245f6dc5ad5c9c9e0174155e28a93d8a2b738..2dbed9b530657d0847a23e00224b1d5248f6601c 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/packages.config +++ b/core/src/index/thirdparty/SPTAG/AnnService/packages.config @@ -7,6 +7,4 @@ - - \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Client/main.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Client/main.cpp index b15121dc75af8d4d217f4efacb7ddda9b0e355bf..52888e3374c094254cabb0ec26c87b5c9949ceb4 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/Client/main.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Client/main.cpp @@ -53,19 +53,19 @@ int main(int argc, char** argv) for (const auto& indexRes : result.m_allIndexResults) { - fprintf(stdout, "Index: %s\n", indexRes.m_indexName.c_str()); + std::cout << "Index: " << indexRes.m_indexName << std::endl; int idx = 0; for (const auto& res : indexRes.m_results) { - fprintf(stdout, "------------------\n"); - fprintf(stdout, "DocIndex: %d Distance: %f\n", res.VID, res.Dist); + std::cout << "------------------" << std::endl; + std::cout << "DocIndex: " << res.VID << " Distance: " << res.Dist; if (indexRes.m_results.WithMeta()) { const auto& metadata = indexRes.m_results.GetMetadata(idx); - fprintf(stdout, " MetaData: %.*s\n", static_cast(metadata.Length()), metadata.Data()); + std::cout << " MetaData: " << std::string((char*)metadata.Data(), metadata.Length()); } - + std::cout << std::endl; ++idx; } } diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/BKT/BKTIndex.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/BKT/BKTIndex.cpp index db64cd507d6b3301bc59ab803d41b2b12bbec46f..e8928726f4fce9391cc1f7021ad95c7f4d21b5a7 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/BKT/BKTIndex.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/BKT/BKTIndex.cpp @@ -13,22 +13,7 @@ namespace SPTAG namespace BKT { template - ErrorCode Index::LoadIndexFromMemory(const std::vector& p_indexBlobs) - { - if (!m_pSamples.Load((char*)p_indexBlobs[0])) return ErrorCode::FailedParseValue; - if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1])) return ErrorCode::FailedParseValue; - if (!m_pGraph.LoadGraphFromMemory((char*)p_indexBlobs[2])) return ErrorCode::FailedParseValue; - m_pMetadata = std::make_shared(); - if (ErrorCode::Success != m_pMetadata->LoadMetadataFromMemory((char*)p_indexBlobs[3])) - return ErrorCode::FailedParseValue; - - m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); - m_workSpacePool->Init(m_iNumberOfThreads); - return ErrorCode::Success; - } - - template - ErrorCode Index::LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader) + ErrorCode Index::LoadConfig(Helper::IniReader& p_reader) { #define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \ SetParameter(RepresentStr, \ @@ -38,34 +23,96 @@ namespace SPTAG #include "inc/Core/BKT/ParameterDefinitionList.h" #undef DefineBKTParameter + return ErrorCode::Success; + } + + template + ErrorCode Index::LoadIndexDataFromMemory(const std::vector& p_indexBlobs) + { + if (p_indexBlobs.size() < 3) return ErrorCode::LackOfInputs; + + if (!m_pSamples.Load((char*)p_indexBlobs[0].Data())) return ErrorCode::FailedParseValue; + if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1].Data())) return ErrorCode::FailedParseValue; + if (!m_pGraph.LoadGraph((char*)p_indexBlobs[2].Data())) return ErrorCode::FailedParseValue; + if (p_indexBlobs.size() > 3 && !m_deletedID.load((char*)p_indexBlobs[3].Data())) return ErrorCode::FailedParseValue; + m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); + m_workSpacePool->Init(m_iNumberOfThreads); + return ErrorCode::Success; + } + + template + ErrorCode Index::LoadIndexData(const std::string& p_folderPath) + { if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; if (!m_pTrees.LoadTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail; if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail; + if (!m_deletedID.load(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail; m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); m_workSpacePool->Init(m_iNumberOfThreads); return ErrorCode::Success; } + template + ErrorCode Index::SaveConfig(std::ostream& p_configOut) const + { +#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \ + p_configOut << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl; + +#include "inc/Core/BKT/ParameterDefinitionList.h" +#undef DefineBKTParameter + p_configOut << std::endl; + return ErrorCode::Success; + } + + template + ErrorCode + Index::SaveIndexData(const std::string& p_folderPath) + { + std::lock_guard lock(m_dataAddLock); + std::shared_lock sharedlock(m_deletedID.getLock()); + + if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; + if (!m_pTrees.SaveTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail; + if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail; + if (!m_deletedID.save(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail; + return ErrorCode::Success; + } + + template + ErrorCode Index::SaveIndexData(const std::vector& p_indexStreams) + { + if (p_indexStreams.size() < 4) return ErrorCode::LackOfInputs; + + std::lock_guard lock(m_dataAddLock); + std::shared_lock sharedlock(m_deletedID.getLock()); + + if (!m_pSamples.Save(*p_indexStreams[0])) return ErrorCode::Fail; + if (!m_pTrees.SaveTrees(*p_indexStreams[1])) return ErrorCode::Fail; + if (!m_pGraph.SaveGraph(*p_indexStreams[2])) return ErrorCode::Fail; + if (!m_deletedID.save(*p_indexStreams[3])) return ErrorCode::Fail; + return ErrorCode::Success; + } + #pragma region K-NN search #define Search(CheckDeleted1) \ m_pTrees.InitSearchTrees(this, p_query, p_space); \ - const int checkPos = m_pGraph.m_iNeighborhoodSize - 1; \ + const DimensionType checkPos = m_pGraph.m_iNeighborhoodSize - 1; \ while (!p_space.m_SPTQueue.empty()) { \ m_pTrees.SearchTrees(this, p_query, p_space, m_iNumberOfOtherDynamicPivots + p_space.m_iNumberOfCheckedLeaves); \ while (!p_space.m_NGQueue.empty()) { \ COMMON::HeapCell gnode = p_space.m_NGQueue.pop(); \ - const int *node = m_pGraph[gnode.node]; \ + const SizeType *node = m_pGraph[gnode.node]; \ _mm_prefetch((const char *)node, _MM_HINT_T0); \ CheckDeleted1 { \ if (p_query.AddPoint(gnode.node, gnode.distance)) { \ p_space.m_iNumOfContinuousNoBetterPropagation = 0; \ - int checkNode = node[checkPos]; \ + SizeType checkNode = node[checkPos]; \ if (checkNode < -1) { \ const COMMON::BKTNode& tnode = m_pTrees[-2 - checkNode]; \ - for (int i = -tnode.childStart; i < tnode.childEnd; i++) { \ + for (SizeType i = -tnode.childStart; i < tnode.childEnd; i++) { \ if (!p_query.AddPoint(m_pTrees[i].centerid, gnode.distance)) break; \ } \ } \ @@ -77,11 +124,11 @@ namespace SPTAG } \ } \ } \ - for (int i = 0; i <= checkPos; i++) { \ + for (DimensionType i = 0; i <= checkPos; i++) { \ _mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); \ } \ - for (int i = 0; i <= checkPos; i++) { \ - int nn_index = node[i]; \ + for (DimensionType i = 0; i <= checkPos; i++) { \ + SizeType nn_index = node[i]; \ if (nn_index < 0) break; \ if (p_space.CheckAndSet(nn_index)) continue; \ float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], GetFeatureDim()); \ @@ -96,9 +143,9 @@ namespace SPTAG p_query.SortResult(); \ template - void Index::SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const + void Index::SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet &p_deleted) const { - Search(if (p_deleted.find(gnode.node) == p_deleted.end())) + Search(if (!p_deleted.contains(gnode.node))) } template @@ -125,7 +172,7 @@ namespace SPTAG { for (int i = 0; i < p_query.GetResultNum(); ++i) { - int result = p_query.GetResult(i)->VID; + SizeType result = p_query.GetResult(i)->VID; p_query.SetMetadata(i, (result < 0) ? ByteArray::c_empty : m_pMetadata->GetMetadata(result)); } } @@ -134,7 +181,7 @@ namespace SPTAG #pragma endregion template - ErrorCode Index::BuildIndex(const void* p_data, int p_vectorNum, int p_dimension) + ErrorCode Index::BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension) { omp_set_num_threads(m_iNumberOfThreads); @@ -144,14 +191,14 @@ namespace SPTAG { int base = COMMON::Utils::GetBase(); #pragma omp parallel for - for (int i = 0; i < GetNumSamples(); i++) { + for (SizeType i = 0; i < GetNumSamples(); i++) { COMMON::Utils::Normalize(m_pSamples[i], GetFeatureDim(), base); } } m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); m_workSpacePool->Init(m_iNumberOfThreads); - + m_pTrees.BuildTrees(this); m_pGraph.BuildGraph(this, &(m_pTrees.GetSampleMap())); @@ -159,31 +206,22 @@ namespace SPTAG } template - ErrorCode Index::RefineIndex(const std::string& p_folderPath) + ErrorCode Index::RefineIndex(const std::vector& p_indexStreams) { - std::string folderPath(p_folderPath); - if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) - { - folderPath += FolderSep; - } - - if (!direxists(folderPath.c_str())) - { - mkdir(folderPath.c_str()); - } + std::lock_guard lock(m_dataAddLock); + std::shared_lock sharedlock(m_deletedID.getLock()); - std::lock_guard lock(m_dataLock); - int newR = GetNumSamples(); + SizeType newR = GetNumSamples(); - std::vector indices; - std::vector reverseIndices(newR); - for (int i = 0; i < newR; i++) { - if (m_deletedID.find(i) == m_deletedID.end()) { + std::vector indices; + std::vector reverseIndices(newR); + for (SizeType i = 0; i < newR; i++) { + if (!m_deletedID.contains(i)) { indices.push_back(i); reverseIndices[i] = i; } else { - while (m_deletedID.find(newR - 1) != m_deletedID.end() && newR > i) newR--; + while (m_deletedID.contains(newR - 1) && newR > i) newR--; if (newR == i) break; indices.push_back(newR - 1); reverseIndices[newR - 1] = i; @@ -193,33 +231,72 @@ namespace SPTAG std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl; - if (false == m_pSamples.Refine(indices, folderPath + m_sDataPointsFilename)) return ErrorCode::FailedCreateFile; - if (nullptr != m_pMetadata && ErrorCode::Success != m_pMetadata->RefineMetadata(indices, folderPath)) return ErrorCode::FailedCreateFile; + if (false == m_pSamples.Refine(indices, *p_indexStreams[0])) return ErrorCode::Fail; + if (nullptr != m_pMetadata && (p_indexStreams.size() < 6 || ErrorCode::Success != m_pMetadata->RefineMetadata(indices, *p_indexStreams[4], *p_indexStreams[5]))) return ErrorCode::Fail; COMMON::BKTree newTrees(m_pTrees); newTrees.BuildTrees(this, &indices); #pragma omp parallel for - for (int i = 0; i < newTrees.size(); i++) { + for (SizeType i = 0; i < newTrees.size(); i++) { newTrees[i].centerid = reverseIndices[newTrees[i].centerid]; } - newTrees.SaveTrees(folderPath + m_sBKTFilename); + newTrees.SaveTrees(*p_indexStreams[1]); + + m_pGraph.RefineGraph(this, indices, reverseIndices, *p_indexStreams[2], &(newTrees.GetSampleMap())); - m_pGraph.RefineGraph(this, indices, reverseIndices, folderPath + m_sGraphFilename, - &(newTrees.GetSampleMap())); + Helper::Concurrent::ConcurrentSet newDeletedID; + newDeletedID.save(*p_indexStreams[3]); return ErrorCode::Success; } template - ErrorCode Index::DeleteIndex(const void* p_vectors, int p_vectorNum) { + ErrorCode Index::RefineIndex(const std::string& p_folderPath) + { + std::string folderPath(p_folderPath); + if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) + { + folderPath += FolderSep; + } + + if (!direxists(folderPath.c_str())) + { + mkdir(folderPath.c_str()); + } + + std::vector streams; + streams.push_back(new std::ofstream(folderPath + m_sDataPointsFilename, std::ios::binary)); + streams.push_back(new std::ofstream(folderPath + m_sBKTFilename, std::ios::binary)); + streams.push_back(new std::ofstream(folderPath + m_sGraphFilename, std::ios::binary)); + streams.push_back(new std::ofstream(folderPath + m_sDeleteDataPointsFilename, std::ios::binary)); + if (nullptr != m_pMetadata) + { + streams.push_back(new std::ofstream(folderPath + m_sMetadataFile, std::ios::binary)); + streams.push_back(new std::ofstream(folderPath + m_sMetadataIndexFile, std::ios::binary)); + } + + for (size_t i = 0; i < streams.size(); i++) + if (!(((std::ofstream*)streams[i])->is_open())) return ErrorCode::FailedCreateFile; + + ErrorCode ret = RefineIndex(streams); + + for (size_t i = 0; i < streams.size(); i++) + { + ((std::ofstream*)streams[i])->close(); + delete streams[i]; + } + return ret; + } + + template + ErrorCode Index::DeleteIndex(const void* p_vectors, SizeType p_vectorNum) { const T* ptr_v = (const T*)p_vectors; #pragma omp parallel for schedule(dynamic) - for (int i = 0; i < p_vectorNum; i++) { + for (SizeType i = 0; i < p_vectorNum; i++) { COMMON::QueryResultSet query(ptr_v + i * GetFeatureDim(), m_pGraph.m_iCEF); SearchIndex(query); for (int i = 0; i < m_pGraph.m_iCEF; i++) { if (query.GetResult(i)->Dist < 1e-6) { - std::lock_guard lock(m_dataLock); m_deletedID.insert(query.GetResult(i)->VID); } } @@ -228,40 +305,43 @@ namespace SPTAG } template - ErrorCode Index::AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension) + ErrorCode Index::DeleteIndex(const SizeType& p_id) { + m_deletedID.insert(p_id); + return ErrorCode::Success; + } + + template + ErrorCode Index::AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start) { - int begin, end; + SizeType begin, end; { - std::lock_guard lock(m_dataLock); - - if (GetNumSamples() == 0) - return BuildIndex(p_vectors, p_vectorNum, p_dimension); - - if (p_dimension != GetFeatureDim()) - return ErrorCode::FailedParseValue; + std::lock_guard lock(m_dataAddLock); begin = GetNumSamples(); end = GetNumSamples() + p_vectorNum; - m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum); - m_pGraph.AddBatch(p_vectorNum); + if (p_start != nullptr) *p_start = begin; + + if (begin == 0) return BuildIndex(p_vectors, p_vectorNum, p_dimension); - if (m_pSamples.R() != end || m_pGraph.R() != end) { + if (p_dimension != GetFeatureDim()) return ErrorCode::FailedParseValue; + + if (m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum) != ErrorCode::Success || m_pGraph.AddBatch(p_vectorNum) != ErrorCode::Success) { std::cout << "Memory Error: Cannot alloc space for vectors" << std::endl; m_pSamples.SetR(begin); m_pGraph.SetR(begin); - return ErrorCode::Fail; + return ErrorCode::MemoryOverFlow; } if (DistCalcMethod::Cosine == m_iDistCalcMethod) { int base = COMMON::Utils::GetBase(); - for (int i = begin; i < end; i++) { + for (SizeType i = begin; i < end; i++) { COMMON::Utils::Normalize((T*)m_pSamples[i], GetFeatureDim(), base); } } } - for (int node = begin; node < end; node++) + for (SizeType node = begin; node < end; node++) { m_pGraph.RefineNode(this, node, true); } @@ -269,47 +349,6 @@ namespace SPTAG return ErrorCode::Success; } - template - ErrorCode - Index::SaveIndexToMemory(std::vector& p_indexBlobs, std::vector &p_indexBlobsLen) - { - p_indexBlobs.resize(4); - p_indexBlobsLen.resize(4); - if (!m_pSamples.Save(&p_indexBlobs[0], p_indexBlobsLen[0])) return ErrorCode::Fail; - if (!m_pTrees.SaveTrees(&p_indexBlobs[1], p_indexBlobsLen[1])) return ErrorCode::Fail; - if (!m_pGraph.SaveGraphToMemory(&p_indexBlobs[2], p_indexBlobsLen[2])) return ErrorCode::Fail; - if (ErrorCode::Success != m_pMetadata->SaveMetadataToMemory(&p_indexBlobs[3], p_indexBlobsLen[3])) - return ErrorCode::Fail; - return ErrorCode::Success; - } - - template - ErrorCode - Index::SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout) - { - m_sDataPointsFilename = "vectors.bin"; - m_sBKTFilename = "tree.bin"; - m_sGraphFilename = "graph.bin"; - -#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \ - p_configout << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl; - -#include "inc/Core/BKT/ParameterDefinitionList.h" -#undef DefineBKTParameter - - p_configout << std::endl; - - if (m_deletedID.size() > 0) { - RefineIndex(p_folderPath); - } - else { - if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; - if (!m_pTrees.SaveTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail; - if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail; - } - return ErrorCode::Success; - } - template ErrorCode Index::SetParameter(const char* p_param, const char* p_value) diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/Common/WorkSpacePool.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/Common/WorkSpacePool.cpp index 036e281ec8588f323b36cfdeb2a5b55862e61812..a88dbdb2d5354dd2cea06719b1f251c1186a7c48 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/Common/WorkSpacePool.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/Common/WorkSpacePool.cpp @@ -7,7 +7,7 @@ using namespace SPTAG; using namespace SPTAG::COMMON; -WorkSpacePool::WorkSpacePool(int p_maxCheck, int p_vectorCount) +WorkSpacePool::WorkSpacePool(int p_maxCheck, SizeType p_vectorCount) : m_maxCheck(p_maxCheck), m_vectorCount(p_vectorCount) { diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/CommonDataStructure.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/CommonDataStructure.cpp deleted file mode 100644 index 4a91554dac49c539a63b13642dd02335bd67a1f7..0000000000000000000000000000000000000000 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/CommonDataStructure.cpp +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "inc/Core/CommonDataStructure.h" - -using namespace SPTAG; - -const ByteArray ByteArray::c_empty; - -ByteArray::ByteArray() - : m_data(nullptr), - m_length(0) -{ -} - - -ByteArray::ByteArray(ByteArray&& p_right) - : m_data(p_right.m_data), - m_length(p_right.m_length), - m_dataHolder(std::move(p_right.m_dataHolder)) -{ -} - - -ByteArray::ByteArray(std::uint8_t* p_array, std::size_t p_length, bool p_transferOnwership) - : m_data(p_array), - m_length(p_length) -{ - if (p_transferOnwership) - { - m_dataHolder.reset(m_data, std::default_delete()); - } -} - - -ByteArray::ByteArray(std::uint8_t* p_array, std::size_t p_length, std::shared_ptr p_dataHolder) - : m_data(p_array), - m_length(p_length), - m_dataHolder(std::move(p_dataHolder)) -{ -} - - -ByteArray::ByteArray(const ByteArray& p_right) - : m_data(p_right.m_data), - m_length(p_right.m_length), - m_dataHolder(p_right.m_dataHolder) -{ -} - - -ByteArray& -ByteArray::operator= (const ByteArray& p_right) -{ - m_data = p_right.m_data; - m_length = p_right.m_length; - m_dataHolder = p_right.m_dataHolder; - - return *this; -} - - -ByteArray& -ByteArray::operator= (ByteArray&& p_right) -{ - m_data = p_right.m_data; - m_length = p_right.m_length; - m_dataHolder = std::move(p_right.m_dataHolder); - - return *this; -} - - -ByteArray::~ByteArray() -{ -} - - -ByteArray -ByteArray::Alloc(std::size_t p_length) -{ - ByteArray byteArray; - if (0 == p_length) - { - return byteArray; - } - - byteArray.m_dataHolder.reset(new std::uint8_t[p_length], - std::default_delete()); - - byteArray.m_length = p_length; - byteArray.m_data = byteArray.m_dataHolder.get(); - return byteArray; -} - - -std::uint8_t* -ByteArray::Data() const -{ - return m_data; -} - - -std::size_t -ByteArray::Length() const -{ - return m_length; -} - - -void -ByteArray::SetData(std::uint8_t* p_array, std::size_t p_length) -{ - m_data = p_array; - m_length = p_length; -} - - -std::shared_ptr -ByteArray::DataHolder() const -{ - return m_dataHolder; -} - - -void -ByteArray::Clear() -{ - m_data = nullptr; - m_dataHolder.reset(); - m_length = 0; -} \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/KDT/KDTIndex.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/KDT/KDTIndex.cpp index 5fce713a565bd8f705a1a821972d7f402fc02341..da3c10e095d15fb6fa559361155b9a9d07af0efb 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/KDT/KDTIndex.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/KDT/KDTIndex.cpp @@ -13,22 +13,7 @@ namespace SPTAG namespace KDT { template - ErrorCode Index::LoadIndexFromMemory(const std::vector& p_indexBlobs) - { - if (!m_pSamples.Load((char*)p_indexBlobs[0])) return ErrorCode::FailedParseValue; - if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1])) return ErrorCode::FailedParseValue; - if (!m_pGraph.LoadGraphFromMemory((char*)p_indexBlobs[2])) return ErrorCode::FailedParseValue; - m_pMetadata = std::make_shared(); - if (ErrorCode::Success != m_pMetadata->LoadMetadataFromMemory((char*)p_indexBlobs[3])) - return ErrorCode::FailedParseValue; - - m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); - m_workSpacePool->Init(m_iNumberOfThreads); - return ErrorCode::Success; - } - - template - ErrorCode Index::LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader) + ErrorCode Index::LoadConfig(Helper::IniReader& p_reader) { #define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \ SetParameter(RepresentStr, \ @@ -38,35 +23,96 @@ namespace SPTAG #include "inc/Core/KDT/ParameterDefinitionList.h" #undef DefineKDTParameter + return ErrorCode::Success; + } + + template + ErrorCode Index::LoadIndexDataFromMemory(const std::vector& p_indexBlobs) + { + if (p_indexBlobs.size() < 3) return ErrorCode::LackOfInputs; + + if (!m_pSamples.Load((char*)p_indexBlobs[0].Data())) return ErrorCode::FailedParseValue; + if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1].Data())) return ErrorCode::FailedParseValue; + if (!m_pGraph.LoadGraph((char*)p_indexBlobs[2].Data())) return ErrorCode::FailedParseValue; + if (p_indexBlobs.size() > 3 && !m_deletedID.load((char*)p_indexBlobs[3].Data())) return ErrorCode::FailedParseValue; + m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); + m_workSpacePool->Init(m_iNumberOfThreads); + return ErrorCode::Success; + } + + template + ErrorCode Index::LoadIndexData(const std::string& p_folderPath) + { if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; if (!m_pTrees.LoadTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail; if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail; + if (!m_deletedID.load(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail; m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); m_workSpacePool->Init(m_iNumberOfThreads); return ErrorCode::Success; } + template + ErrorCode Index::SaveConfig(std::ostream& p_configOut) const + { +#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \ + p_configOut << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl; + +#include "inc/Core/KDT/ParameterDefinitionList.h" +#undef DefineKDTParameter + p_configOut << std::endl; + return ErrorCode::Success; + } + + template + ErrorCode Index::SaveIndexData(const std::string& p_folderPath) + { + std::lock_guard lock(m_dataAddLock); + std::shared_lock sharedlock(m_deletedID.getLock()); + + if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; + if (!m_pTrees.SaveTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail; + if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail; + if (!m_deletedID.save(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail; + return ErrorCode::Success; + } + + template + ErrorCode Index::SaveIndexData(const std::vector& p_indexStreams) + { + if (p_indexStreams.size() < 4) return ErrorCode::LackOfInputs; + + std::lock_guard lock(m_dataAddLock); + std::shared_lock sharedlock(m_deletedID.getLock()); + + if (!m_pSamples.Save(*p_indexStreams[0])) return ErrorCode::Fail; + if (!m_pTrees.SaveTrees(*p_indexStreams[1])) return ErrorCode::Fail; + if (!m_pGraph.SaveGraph(*p_indexStreams[2])) return ErrorCode::Fail; + if (!m_deletedID.save(*p_indexStreams[3])) return ErrorCode::Fail; + return ErrorCode::Success; + } + #pragma region K-NN search #define Search(CheckDeleted1) \ m_pTrees.InitSearchTrees(this, p_query, p_space, m_iNumberOfInitialDynamicPivots); \ while (!p_space.m_NGQueue.empty()) { \ COMMON::HeapCell gnode = p_space.m_NGQueue.pop(); \ - const int *node = m_pGraph[gnode.node]; \ + const SizeType *node = m_pGraph[gnode.node]; \ _mm_prefetch((const char *)node, _MM_HINT_T0); \ CheckDeleted1 { \ if (!p_query.AddPoint(gnode.node, gnode.distance) && p_space.m_iNumberOfCheckedLeaves > p_space.m_iMaxCheck) { \ p_query.SortResult(); return; \ } \ } \ - for (int i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) \ + for (DimensionType i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) \ _mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); \ float upperBound = max(p_query.worstDist(), gnode.distance); \ bool bLocalOpt = true; \ - for (int i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) { \ - int nn_index = node[i]; \ + for (DimensionType i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) { \ + SizeType nn_index = node[i]; \ if (nn_index < 0) break; \ if (p_space.CheckAndSet(nn_index)) continue; \ float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], GetFeatureDim()); \ @@ -87,9 +133,9 @@ namespace SPTAG p_query.SortResult(); \ template - void Index::SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const + void Index::SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet &p_deleted) const { - Search(if (p_deleted.find(gnode.node) == p_deleted.end())) + Search(if (!p_deleted.contains(gnode.node))) } template @@ -116,7 +162,7 @@ namespace SPTAG { for (int i = 0; i < p_query.GetResultNum(); ++i) { - int result = p_query.GetResult(i)->VID; + SizeType result = p_query.GetResult(i)->VID; p_query.SetMetadata(i, (result < 0) ? ByteArray::c_empty : m_pMetadata->GetMetadata(result)); } } @@ -125,7 +171,7 @@ namespace SPTAG #pragma endregion template - ErrorCode Index::BuildIndex(const void* p_data, int p_vectorNum, int p_dimension) + ErrorCode Index::BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension) { omp_set_num_threads(m_iNumberOfThreads); @@ -135,7 +181,7 @@ namespace SPTAG { int base = COMMON::Utils::GetBase(); #pragma omp parallel for - for (int i = 0; i < GetNumSamples(); i++) { + for (SizeType i = 0; i < GetNumSamples(); i++) { COMMON::Utils::Normalize(m_pSamples[i], GetFeatureDim(), base); } } @@ -145,36 +191,27 @@ namespace SPTAG m_pTrees.BuildTrees(this); m_pGraph.BuildGraph(this); - + return ErrorCode::Success; } template - ErrorCode Index::RefineIndex(const std::string& p_folderPath) + ErrorCode Index::RefineIndex(const std::vector& p_indexStreams) { - std::string folderPath(p_folderPath); - if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) - { - folderPath += FolderSep; - } + std::lock_guard lock(m_dataAddLock); + std::shared_lock sharedlock(m_deletedID.getLock()); - if (!direxists(folderPath.c_str())) - { - mkdir(folderPath.c_str()); - } + SizeType newR = GetNumSamples(); - std::lock_guard lock(m_dataLock); - int newR = GetNumSamples(); - - std::vector indices; - std::vector reverseIndices(newR); - for (int i = 0; i < newR; i++) { - if (m_deletedID.find(i) == m_deletedID.end()) { + std::vector indices; + std::vector reverseIndices(newR); + for (SizeType i = 0; i < newR; i++) { + if (!m_deletedID.contains(i)) { indices.push_back(i); reverseIndices[i] = i; } else { - while (m_deletedID.find(newR - 1) != m_deletedID.end() && newR > i) newR--; + while (m_deletedID.contains(newR - 1) && newR > i) newR--; if (newR == i) break; indices.push_back(newR - 1); reverseIndices[newR - 1] = i; @@ -184,35 +221,75 @@ namespace SPTAG std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl; - if (false == m_pSamples.Refine(indices, folderPath + m_sDataPointsFilename)) return ErrorCode::FailedCreateFile; - if (nullptr != m_pMetadata && ErrorCode::Success != m_pMetadata->RefineMetadata(indices, folderPath)) return ErrorCode::FailedCreateFile; + if (false == m_pSamples.Refine(indices, *p_indexStreams[0])) return ErrorCode::Fail; + if (nullptr != m_pMetadata && (p_indexStreams.size() < 6 || ErrorCode::Success != m_pMetadata->RefineMetadata(indices, *p_indexStreams[4], *p_indexStreams[5]))) return ErrorCode::Fail; - m_pGraph.RefineGraph(this, indices, reverseIndices, folderPath + m_sGraphFilename); + m_pGraph.RefineGraph(this, indices, reverseIndices, *p_indexStreams[2]); COMMON::KDTree newTrees(m_pTrees); newTrees.BuildTrees(this, &indices); #pragma omp parallel for - for (int i = 0; i < newTrees.size(); i++) { + for (SizeType i = 0; i < newTrees.size(); i++) { if (newTrees[i].left < 0) newTrees[i].left = -reverseIndices[-newTrees[i].left - 1] - 1; if (newTrees[i].right < 0) newTrees[i].right = -reverseIndices[-newTrees[i].right - 1] - 1; } - newTrees.SaveTrees(folderPath + m_sKDTFilename); + newTrees.SaveTrees(*p_indexStreams[1]); + + Helper::Concurrent::ConcurrentSet newDeletedID; + newDeletedID.save(*p_indexStreams[3]); return ErrorCode::Success; } template - ErrorCode Index::DeleteIndex(const void* p_vectors, int p_vectorNum) { + ErrorCode Index::RefineIndex(const std::string& p_folderPath) + { + std::string folderPath(p_folderPath); + if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) + { + folderPath += FolderSep; + } + + if (!direxists(folderPath.c_str())) + { + mkdir(folderPath.c_str()); + } + + std::vector streams; + streams.push_back(new std::ofstream(folderPath + m_sDataPointsFilename, std::ios::binary)); + streams.push_back(new std::ofstream(folderPath + m_sKDTFilename, std::ios::binary)); + streams.push_back(new std::ofstream(folderPath + m_sGraphFilename, std::ios::binary)); + streams.push_back(new std::ofstream(folderPath + m_sDeleteDataPointsFilename, std::ios::binary)); + if (nullptr != m_pMetadata) + { + streams.push_back(new std::ofstream(folderPath + m_sMetadataFile, std::ios::binary)); + streams.push_back(new std::ofstream(folderPath + m_sMetadataIndexFile, std::ios::binary)); + } + + for (size_t i = 0; i < streams.size(); i++) + if (!(((std::ofstream*)streams[i])->is_open())) return ErrorCode::FailedCreateFile; + + ErrorCode ret = RefineIndex(streams); + + for (size_t i = 0; i < streams.size(); i++) + { + ((std::ofstream*)streams[i])->close(); + delete streams[i]; + } + return ret; + } + + template + ErrorCode Index::DeleteIndex(const void* p_vectors, SizeType p_vectorNum) { const T* ptr_v = (const T*)p_vectors; #pragma omp parallel for schedule(dynamic) - for (int i = 0; i < p_vectorNum; i++) { + for (SizeType i = 0; i < p_vectorNum; i++) { COMMON::QueryResultSet query(ptr_v + i * GetFeatureDim(), m_pGraph.m_iCEF); SearchIndex(query); for (int i = 0; i < m_pGraph.m_iCEF; i++) { if (query.GetResult(i)->Dist < 1e-6) { - std::lock_guard lock(m_dataLock); m_deletedID.insert(query.GetResult(i)->VID); } } @@ -221,40 +298,43 @@ namespace SPTAG } template - ErrorCode Index::AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension) + ErrorCode Index::DeleteIndex(const SizeType& p_id) { + m_deletedID.insert(p_id); + return ErrorCode::Success; + } + + template + ErrorCode Index::AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start) { - int begin, end; + SizeType begin, end; { - std::lock_guard lock(m_dataLock); - - if (GetNumSamples() == 0) - return BuildIndex(p_vectors, p_vectorNum, p_dimension); - - if (p_dimension != GetFeatureDim()) - return ErrorCode::FailedParseValue; + std::lock_guard lock(m_dataAddLock); begin = GetNumSamples(); end = GetNumSamples() + p_vectorNum; - m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum); - m_pGraph.AddBatch(p_vectorNum); + if (p_start != nullptr) *p_start = begin; + + if (begin == 0) return BuildIndex(p_vectors, p_vectorNum, p_dimension); - if (m_pSamples.R() != end || m_pGraph.R() != end) { + if (p_dimension != GetFeatureDim()) return ErrorCode::FailedParseValue; + + if (m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum) != ErrorCode::Success || m_pGraph.AddBatch(p_vectorNum) != ErrorCode::Success) { std::cout << "Memory Error: Cannot alloc space for vectors" << std::endl; m_pSamples.SetR(begin); m_pGraph.SetR(begin); - return ErrorCode::Fail; + return ErrorCode::MemoryOverFlow; } if (DistCalcMethod::Cosine == m_iDistCalcMethod) { int base = COMMON::Utils::GetBase(); - for (int i = begin; i < end; i++) { + for (SizeType i = begin; i < end; i++) { COMMON::Utils::Normalize((T*)m_pSamples[i], GetFeatureDim(), base); } } } - for (int node = begin; node < end; node++) + for (SizeType node = begin; node < end; node++) { m_pGraph.RefineNode(this, node, true); } @@ -262,47 +342,6 @@ namespace SPTAG return ErrorCode::Success; } - template - ErrorCode - Index::SaveIndexToMemory(std::vector& p_indexBlobs, std::vector &p_indexBlobsLen) - { - p_indexBlobs.resize(4); - p_indexBlobsLen.resize(4); - if (!m_pSamples.Save(&p_indexBlobs[0], p_indexBlobsLen[0])) return ErrorCode::Fail; - if (!m_pTrees.SaveTrees(&p_indexBlobs[1], p_indexBlobsLen[1])) return ErrorCode::Fail; - if (!m_pGraph.SaveGraphToMemory(&p_indexBlobs[2], p_indexBlobsLen[2])) return ErrorCode::Fail; - if (ErrorCode::Success != m_pMetadata->SaveMetadataToMemory(&p_indexBlobs[3], p_indexBlobsLen[3])) - return ErrorCode::Fail; - return ErrorCode::Success; - } - - template - ErrorCode - Index::SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout) - { - m_sDataPointsFilename = "vectors.bin"; - m_sKDTFilename = "tree.bin"; - m_sGraphFilename = "graph.bin"; - -#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \ - p_configout << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl; - -#include "inc/Core/KDT/ParameterDefinitionList.h" -#undef DefineKDTParameter - - p_configout << std::endl; - - if (m_deletedID.size() > 0) { - RefineIndex(p_folderPath); - } - else { - if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; - if (!m_pTrees.SaveTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail; - if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail; - } - return ErrorCode::Success; - } - template ErrorCode Index::SetParameter(const char* p_param, const char* p_value) diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/MetadataSet.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/MetadataSet.cpp index a5d410ce5e9490eedc99c817cce9886e4b132222..137eb5d13ab15c49ce40092c94e5c3190117bd53 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/MetadataSet.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/MetadataSet.cpp @@ -5,68 +5,43 @@ #include #include -#include using namespace SPTAG; ErrorCode -MetadataSet::RefineMetadata(std::vector& indices, const std::string& p_folderPath) +MetadataSet::RefineMetadata(std::vector& indices, std::ostream& p_metaOut, std::ostream& p_metaIndexOut) { - std::ofstream metaOut(p_folderPath + "metadata.bin_tmp", std::ios::binary); - std::ofstream metaIndexOut(p_folderPath + "metadataIndex.bin", std::ios::binary); - if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile; - - int R = (int)indices.size(); - metaIndexOut.write((char*)&R, sizeof(int)); + SizeType R = (SizeType)indices.size(); + p_metaIndexOut.write((char*)&R, sizeof(SizeType)); std::uint64_t offset = 0; - for (int i = 0; i < R; i++) { - metaIndexOut.write((char*)&offset, sizeof(std::uint64_t)); + for (SizeType i = 0; i < R; i++) { + p_metaIndexOut.write((char*)&offset, sizeof(std::uint64_t)); ByteArray meta = GetMetadata(indices[i]); - metaOut.write((char*)meta.Data(), sizeof(uint8_t)*meta.Length()); + p_metaOut.write((char*)meta.Data(), sizeof(uint8_t)*meta.Length()); offset += meta.Length(); } - metaOut.close(); - metaIndexOut.write((char*)&offset, sizeof(std::uint64_t)); - metaIndexOut.close(); - - SPTAG::MetadataSet::MetaCopy(p_folderPath + "metadata.bin_tmp", p_folderPath + "metadata.bin"); + p_metaIndexOut.write((char*)&offset, sizeof(std::uint64_t)); return ErrorCode::Success; } -ErrorCode -MetadataSet::MetaCopy(const std::string& p_src, const std::string& p_dst) +ErrorCode +MetadataSet::RefineMetadata(std::vector& indices, const std::string& p_metaFile, const std::string& p_metaindexFile) { - if (p_src == p_dst) return ErrorCode::Success; - - std::ifstream src(p_src, std::ios::binary); - if (!src.is_open()) - { - std::cerr << "ERROR: Can't open " << p_src << std::endl; - return ErrorCode::FailedOpenFile; - } - - std::ofstream dst(p_dst, std::ios::binary); - if (!dst.is_open()) - { - std::cerr << "ERROR: Can't create " << p_dst << std::endl; - src.close(); - return ErrorCode::FailedCreateFile; - } + std::ofstream metaOut(p_metaFile + "_tmp", std::ios::binary); + std::ofstream metaIndexOut(p_metaindexFile, std::ios::binary); + if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile; - int bufsize = 1000000; - char* buf = new char[bufsize]; - while (!src.eof()) { - src.read(buf, bufsize); - dst.write(buf, src.gcount()); - } - delete[] buf; - src.close(); - dst.close(); + RefineMetadata(indices, metaOut, metaIndexOut); + metaOut.close(); + metaIndexOut.close(); + if (fileexists(p_metaFile.c_str())) std::remove(p_metaFile.c_str()); + std::rename((p_metaFile + "_tmp").c_str(), p_metaFile.c_str()); return ErrorCode::Success; } + MetadataSet::MetadataSet() { } @@ -107,19 +82,19 @@ FileMetadataSet::~FileMetadataSet() ByteArray -FileMetadataSet::GetMetadata(IndexType p_vectorID) const +FileMetadataSet::GetMetadata(SizeType p_vectorID) const { std::uint64_t startoff = m_pOffsets[p_vectorID]; std::uint64_t bytes = m_pOffsets[p_vectorID + 1] - startoff; - if (p_vectorID < (IndexType)m_count) { + if (p_vectorID < m_count) { m_fp->seekg(startoff, std::ios_base::beg); - ByteArray b = ByteArray::Alloc((SizeType)bytes); + ByteArray b = ByteArray::Alloc(bytes); m_fp->read((char*)b.Data(), bytes); return b; } else { startoff -= m_pOffsets[m_count]; - return ByteArray((std::uint8_t*)m_newdata.data() + startoff, static_cast(bytes), false); + return ByteArray((std::uint8_t*)m_newdata.data() + startoff, bytes, false); } } @@ -138,10 +113,18 @@ FileMetadataSet::Available() const } +std::pair +FileMetadataSet::BufferSize() const +{ + return std::make_pair(m_pOffsets[m_pOffsets.size() - 1], + sizeof(SizeType) + sizeof(std::uint64_t) * m_pOffsets.size()); +} + + void FileMetadataSet::AddBatch(MetadataSet& data) { - for (int i = 0; i < static_cast(data.Count()); i++) + for (SizeType i = 0; i < data.Count(); i++) { ByteArray newdata = data.GetMetadata(i); m_newdata.insert(m_newdata.end(), newdata.Data(), newdata.Data() + newdata.Length()); @@ -150,45 +133,52 @@ FileMetadataSet::AddBatch(MetadataSet& data) } + ErrorCode -FileMetadataSet::SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile) +FileMetadataSet::SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut) { - ErrorCode ret = ErrorCode::Success; - m_fp->close(); - ret = MetaCopy(m_metaFile, p_metaFile); - if (ErrorCode::Success != ret) - { - return ret; + m_fp->seekg(0, std::ios_base::beg); + + int bufsize = 1000000; + char* buf = new char[bufsize]; + while (!m_fp->eof()) { + m_fp->read(buf, bufsize); + p_metaOut.write(buf, m_fp->gcount()); } + delete[] buf; + if (m_newdata.size() > 0) { - std::ofstream tmpout(p_metaFile, std::ofstream::app|std::ios::binary); - if (!tmpout.is_open()) return ErrorCode::FailedOpenFile; - tmpout.write((char*)m_newdata.data(), m_newdata.size()); - tmpout.close(); + p_metaOut.write((char*)m_newdata.data(), m_newdata.size()); } - m_fp->open(p_metaFile, std::ifstream::binary); - std::ofstream dst(p_metaindexFile, std::ios::binary); - m_count = static_cast(m_pOffsets.size()) - 1; - m_newdata.clear(); - dst.write((char*)&m_count, sizeof(m_count)); - dst.write((char*)m_pOffsets.data(), sizeof(std::uint64_t) * m_pOffsets.size()); - return ret; + SizeType count = Count(); + p_metaIndexOut.write((char*)&count, sizeof(SizeType)); + p_metaIndexOut.write((char*)m_pOffsets.data(), sizeof(std::uint64_t) * m_pOffsets.size()); + return ErrorCode::Success; } ErrorCode -FileMetadataSet::SaveMetadataToMemory(void **pGraphMemFile, int64_t &len) { - // TODO(lxj): serialize file to mem? - return ErrorCode::Fail; -} +FileMetadataSet::SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile) +{ + std::ofstream metaOut(p_metaFile + "_tmp", std::ios::binary); + std::ofstream metaIndexOut(p_metaindexFile, std::ios::binary); + if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile; -ErrorCode -FileMetadataSet::LoadMetadataFromMemory(void *pGraphMemFile) { - // TODO(lxj): not support yet - return ErrorCode::Fail; + SaveMetadata(metaOut, metaIndexOut); + metaOut.close(); + metaIndexOut.close(); + + m_fp->close(); + if (fileexists(p_metaFile.c_str())) std::remove(p_metaFile.c_str()); + std::rename((p_metaFile + "_tmp").c_str(), p_metaFile.c_str()); + m_fp->open(p_metaFile, std::ifstream::binary); + m_count = Count(); + m_newdata.clear(); + return ErrorCode::Success; } + MemMetadataSet::MemMetadataSet(ByteArray p_metadata, ByteArray p_offsets, SizeType p_count) : m_metadataHolder(std::move(p_metadata)), m_offsetHolder(std::move(p_offsets)), @@ -205,17 +195,17 @@ MemMetadataSet::~MemMetadataSet() ByteArray -MemMetadataSet::GetMetadata(IndexType p_vectorID) const +MemMetadataSet::GetMetadata(SizeType p_vectorID) const { - if (static_cast(p_vectorID) < m_count) + if (p_vectorID < m_count) { return ByteArray(m_metadataHolder.Data() + m_offsets[p_vectorID], - static_cast(m_offsets[p_vectorID + 1] - m_offsets[p_vectorID]), - m_metadataHolder.DataHolder()); + m_offsets[p_vectorID + 1] - m_offsets[p_vectorID], + false); } - else if (p_vectorID < m_offsets.size() - 1) { + else if (p_vectorID < (SizeType)(m_offsets.size() - 1)) { return ByteArray((std::uint8_t*)m_newdata.data() + m_offsets[p_vectorID] - m_offsets[m_count], - static_cast(m_offsets[p_vectorID + 1] - m_offsets[p_vectorID]), + m_offsets[p_vectorID + 1] - m_offsets[p_vectorID], false); } @@ -226,7 +216,7 @@ MemMetadataSet::GetMetadata(IndexType p_vectorID) const SizeType MemMetadataSet::Count() const { - return m_count; + return static_cast(m_offsets.size() - 1); } @@ -236,10 +226,18 @@ MemMetadataSet::Available() const return m_metadataHolder.Length() > 0 && m_offsetHolder.Length() > 0; } + +std::pair +MemMetadataSet::BufferSize() const +{ + return std::make_pair(m_offsets[m_offsets.size() - 1], + sizeof(SizeType) + sizeof(std::uint64_t) * m_offsets.size()); +} + void MemMetadataSet::AddBatch(MetadataSet& data) { - for (int i = 0; i < static_cast(data.Count()); i++) + for (SizeType i = 0; i < data.Count(); i++) { ByteArray newdata = data.GetMetadata(i); m_newdata.insert(m_newdata.end(), newdata.Data(), newdata.Data() + newdata.Length()); @@ -247,83 +245,36 @@ MemMetadataSet::AddBatch(MetadataSet& data) } } + ErrorCode -MemMetadataSet::SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile) +MemMetadataSet::SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut) { - std::ofstream outputStream; - outputStream.open(p_metaFile, std::ios::binary); - if (!outputStream.is_open()) - { - std::cerr << "Error: Failed to create file " << p_metaFile << "." << std::endl; - return ErrorCode::FailedCreateFile; - } - - outputStream.write(reinterpret_cast(m_metadataHolder.Data()), m_metadataHolder.Length()); - outputStream.write((const char*)m_newdata.data(), sizeof(std::uint8_t)*m_newdata.size()); - outputStream.close(); - - outputStream.open(p_metaindexFile, std::ios::binary); - if (!outputStream.is_open()) - { - std::cerr << "Error: Failed to create file " << p_metaindexFile << "." << std::endl; - return ErrorCode::FailedCreateFile; + p_metaOut.write(reinterpret_cast(m_metadataHolder.Data()), m_metadataHolder.Length()); + if (m_newdata.size() > 0) { + p_metaOut.write((char*)m_newdata.data(), m_newdata.size()); } - m_count = static_cast(m_offsets.size()) - 1; - outputStream.write(reinterpret_cast(&m_count), sizeof(m_count)); - outputStream.write(reinterpret_cast(m_offsets.data()), sizeof(std::uint64_t)*m_offsets.size()); - outputStream.close(); - + SizeType count = Count(); + p_metaIndexOut.write((char*)&count, sizeof(SizeType)); + p_metaIndexOut.write((char*)m_offsets.data(), sizeof(std::uint64_t) * m_offsets.size()); return ErrorCode::Success; } -ErrorCode -MemMetadataSet::SaveMetadataToMemory(void **pGraphMemFile, int64_t &len) { - auto size = sizeof(int64_t) + sizeof(int64_t) + m_metadataHolder.Length() + sizeof(std::uint64_t) * m_offsets.size(); - char* mem = (char*)malloc(size); - if (mem == NULL) return ErrorCode::Fail; - - auto ptr = mem; - *(int64_t*)ptr = m_metadataHolder.Length(); - ptr += sizeof(int64_t); - - m_count = static_cast(m_offsets.size()) - 1; - *(int64_t*)ptr = m_count; - ptr += sizeof(int64_t); - - memcpy(ptr, m_metadataHolder.Data(), m_metadataHolder.Length()); - ptr += m_metadataHolder.Length(); - memcpy(ptr, m_offsets.data(), sizeof(std::uint64_t)*m_offsets.size()); - - *pGraphMemFile = mem; - len = size; - - return ErrorCode::Success; -} ErrorCode -MemMetadataSet::LoadMetadataFromMemory(void *pGraphMemFile) { - m_metadataHolder.Clear(); - m_offsetHolder.Clear(); - m_offsets.clear(); - - char* ptr = (char *)pGraphMemFile; - auto metadataHolderLength = *(int64_t *)ptr; - ptr += sizeof(int64_t); - - m_count = *(int64_t *)ptr; - ptr += sizeof(int64_t); - - m_metadataHolder = ByteArray::Alloc(metadataHolderLength); - memcpy(m_metadataHolder.Data(), ptr, metadataHolderLength); - ptr += metadataHolderLength; - - m_offsetHolder = ByteArray::Alloc(sizeof(std::uint64_t ) * (m_count + 1)); - memcpy(m_offsetHolder.Data(), ptr, sizeof(std::uint64_t ) * (m_count + 1)); +MemMetadataSet::SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile) +{ + std::ofstream metaOut(p_metaFile + "_tmp", std::ios::binary); + std::ofstream metaIndexOut(p_metaindexFile, std::ios::binary); + if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile; - const std::uint64_t* newdata = reinterpret_cast(m_offsetHolder.Data()); - m_offsets.insert(m_offsets.end(), newdata, newdata + m_count + 1); + SaveMetadata(metaOut, metaIndexOut); + metaOut.close(); + metaIndexOut.close(); + if (fileexists(p_metaFile.c_str())) std::remove(p_metaFile.c_str()); + std::rename((p_metaFile + "_tmp").c_str(), p_metaFile.c_str()); return ErrorCode::Success; } + diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/VectorIndex.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/VectorIndex.cpp index 657978b74994b07ee6ef7393839c0ae854c91def..9c7ccf5492c9145d8b2ee7fb5a84d46176642890 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/VectorIndex.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/VectorIndex.cpp @@ -6,6 +6,7 @@ #include "inc/Helper/CommonHelper.h" #include "inc/Helper/StringConvert.h" #include "inc/Helper/SimpleIniReader.h" +#include "inc/Helper/BufferStream.h" #include "inc/Core/BKT/Index.h" #include "inc/Core/KDT/Index.h" @@ -46,7 +47,7 @@ VectorIndex::SetMetadata(const std::string& p_metadataFilePath, const std::strin ByteArray -VectorIndex::GetMetadata(IndexType p_vectorID) const { +VectorIndex::GetMetadata(SizeType p_vectorID) const { if (nullptr != m_pMetadata) { return m_pMetadata->GetMetadata(p_vectorID); @@ -55,6 +56,100 @@ VectorIndex::GetMetadata(IndexType p_vectorID) const { } +std::shared_ptr> VectorIndex::CalculateBufferSize() const +{ + std::shared_ptr> ret = BufferSize(); + if (m_pMetadata != nullptr) + { + auto metasize = m_pMetadata->BufferSize(); + ret->push_back(metasize.first); + ret->push_back(metasize.second); + } + return std::move(ret); +} + + +ErrorCode +VectorIndex::LoadIndexConfig(Helper::IniReader& p_reader) +{ + std::string metadataSection("MetaData"); + if (p_reader.DoesSectionExist(metadataSection)) + { + m_sMetadataFile = p_reader.GetParameter(metadataSection, "MetaDataFilePath", std::string()); + m_sMetadataIndexFile = p_reader.GetParameter(metadataSection, "MetaDataIndexPath", std::string()); + } + + if (DistCalcMethod::Undefined == p_reader.GetParameter("Index", "DistCalcMethod", DistCalcMethod::Undefined)) + { + std::cerr << "Error: Failed to load parameter DistCalcMethod." << std::endl; + return ErrorCode::Fail; + } + return LoadConfig(p_reader); +} + + +ErrorCode +VectorIndex::SaveIndexConfig(std::ostream& p_configOut) +{ + if (nullptr != m_pMetadata) + { + p_configOut << "[MetaData]" << std::endl; + p_configOut << "MetaDataFilePath=" << m_sMetadataFile << std::endl; + p_configOut << "MetaDataIndexPath=" << m_sMetadataIndexFile << std::endl; + if (nullptr != m_pMetaToVec) p_configOut << "MetaDataToVectorIndex=true" << std::endl; + p_configOut << std::endl; + } + + p_configOut << "[Index]" << std::endl; + p_configOut << "IndexAlgoType=" << Helper::Convert::ConvertToString(GetIndexAlgoType()) << std::endl; + p_configOut << "ValueType=" << Helper::Convert::ConvertToString(GetVectorValueType()) << std::endl; + p_configOut << std::endl; + + return SaveConfig(p_configOut); +} + + +void +VectorIndex::BuildMetaMapping() +{ + m_pMetaToVec.reset(new std::unordered_map); + for (SizeType i = 0; i < m_pMetadata->Count(); i++) { + ByteArray meta = m_pMetadata->GetMetadata(i); + m_pMetaToVec->emplace(std::string((char*)meta.Data(), meta.Length()), i); + } +} + + +ErrorCode +VectorIndex::LoadIndex(const std::string& p_config, const std::vector& p_indexBlobs) +{ + SPTAG::Helper::IniReader p_reader; + std::istringstream p_configin(p_config); + if (SPTAG::ErrorCode::Success != p_reader.LoadIni(p_configin)) return ErrorCode::FailedParseValue; + LoadIndexConfig(p_reader); + + if (p_reader.DoesSectionExist("MetaData") && p_indexBlobs.size() > 4) + { + ByteArray pMetaIndex = p_indexBlobs[p_indexBlobs.size() - 1]; + m_pMetadata.reset(new MemMetadataSet(p_indexBlobs[p_indexBlobs.size() - 2], + ByteArray(pMetaIndex.Data() + sizeof(SizeType), pMetaIndex.Length() - sizeof(SizeType), false), + *((SizeType*)pMetaIndex.Data()))); + + if (!m_pMetadata->Available()) + { + std::cerr << "Error: Failed to load metadata." << std::endl; + return ErrorCode::Fail; + } + + if (p_reader.GetParameter("MetaData", "MetaDataToVectorIndex", std::string()) == "true") + { + BuildMetaMapping(); + } + } + return LoadIndexDataFromMemory(p_indexBlobs); +} + + ErrorCode VectorIndex::LoadIndex(const std::string& p_folderPath) { @@ -65,40 +160,64 @@ VectorIndex::LoadIndex(const std::string& p_folderPath) } Helper::IniReader p_configReader; - if (ErrorCode::Success != p_configReader.LoadIniFile(folderPath + "/indexloader.ini")) + if (ErrorCode::Success != p_configReader.LoadIniFile(folderPath + "/indexloader.ini")) return ErrorCode::FailedOpenFile; + LoadIndexConfig(p_configReader); + + if (p_configReader.DoesSectionExist("MetaData")) { - return ErrorCode::FailedOpenFile; - } - - std::string metadataSection("MetaData"); - if (p_configReader.DoesSectionExist(metadataSection)) - { - std::string metadataFilePath = p_configReader.GetParameter(metadataSection, - "MetaDataFilePath", - std::string()); - std::string metadataIndexFilePath = p_configReader.GetParameter(metadataSection, - "MetaDataIndexPath", - std::string()); - - m_pMetadata.reset(new FileMetadataSet(folderPath + metadataFilePath, folderPath + metadataIndexFilePath)); + m_pMetadata.reset(new FileMetadataSet(folderPath + m_sMetadataFile, folderPath + m_sMetadataIndexFile)); if (!m_pMetadata->Available()) { std::cerr << "Error: Failed to load metadata." << std::endl; return ErrorCode::Fail; } + + if (p_configReader.GetParameter("MetaData", "MetaDataToVectorIndex", std::string()) == "true") + { + BuildMetaMapping(); + } } - if (DistCalcMethod::Undefined == p_configReader.GetParameter("Index", "DistCalcMethod", DistCalcMethod::Undefined)) + return LoadIndexData(folderPath); +} + + +ErrorCode +VectorIndex::SaveIndex(std::string& p_config, const std::vector& p_indexBlobs) +{ + std::ostringstream p_configStream; + SaveIndexConfig(p_configStream); + p_config = p_configStream.str(); + + std::vector p_indexStreams; + for (size_t i = 0; i < p_indexBlobs.size(); i++) { - std::cerr << "Error: Failed to load parameter DistCalcMethod." << std::endl; - return ErrorCode::Fail; + p_indexStreams.push_back(new Helper::obufferstream(new Helper::streambuf((char*)p_indexBlobs[i].Data(), p_indexBlobs[i].Length()), true)); } - return LoadIndex(folderPath, p_configReader); + ErrorCode ret = ErrorCode::Success; + if (NeedRefine()) + { + ret = RefineIndex(p_indexStreams); + } + else + { + if (m_pMetadata != nullptr && p_indexStreams.size() > 5) + { + ret = m_pMetadata->SaveMetadata(*p_indexStreams[p_indexStreams.size() - 2], *p_indexStreams[p_indexStreams.size() - 1]); + } + if (ErrorCode::Success == ret) ret = SaveIndexData(p_indexStreams); + } + for (size_t i = 0; i < p_indexStreams.size(); i++) + { + delete p_indexStreams[i]; + } + return ret; } -ErrorCode VectorIndex::SaveIndex(const std::string& p_folderPath) +ErrorCode +VectorIndex::SaveIndex(const std::string& p_folderPath) { std::string folderPath(p_folderPath); if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) @@ -111,39 +230,24 @@ ErrorCode VectorIndex::SaveIndex(const std::string& p_folderPath) mkdir(folderPath.c_str()); } - std::string loaderFilePath = folderPath + "indexloader.ini"; + std::ofstream configFile(folderPath + "indexloader.ini"); + if (!configFile.is_open()) return ErrorCode::FailedCreateFile; + SaveIndexConfig(configFile); + configFile.close(); + + if (NeedRefine()) return RefineIndex(p_folderPath); - std::ofstream loaderFile(loaderFilePath); - if (!loaderFile.is_open()) - { - return ErrorCode::FailedCreateFile; - } - - if (nullptr != m_pMetadata) + if (m_pMetadata != nullptr) { - std::string metadataFile = "metadata.bin"; - std::string metadataIndexFile = "metadataIndex.bin"; - loaderFile << "[MetaData]" << std::endl; - loaderFile << "MetaDataFilePath=" << metadataFile << std::endl; - loaderFile << "MetaDataIndexPath=" << metadataIndexFile << std::endl; - loaderFile << std::endl; - - m_pMetadata->SaveMetadata(folderPath + metadataFile, folderPath + metadataIndexFile); + ErrorCode ret = m_pMetadata->SaveMetadata(folderPath + m_sMetadataFile, folderPath + m_sMetadataIndexFile); + if (ErrorCode::Success != ret) return ret; } - - loaderFile << "[Index]" << std::endl; - loaderFile << "IndexAlgoType=" << Helper::Convert::ConvertToString(GetIndexAlgoType()) << std::endl; - loaderFile << "ValueType=" << Helper::Convert::ConvertToString(GetVectorValueType()) << std::endl; - loaderFile << std::endl; - - ErrorCode ret = SaveIndex(folderPath, loaderFile); - loaderFile.close(); - return ret; + return SaveIndexData(folderPath); } ErrorCode VectorIndex::BuildIndex(std::shared_ptr p_vectorSet, - std::shared_ptr p_metadataSet) + std::shared_ptr p_metadataSet, bool p_withMetaIndex) { if (nullptr == p_vectorSet || p_vectorSet->Count() == 0 || p_vectorSet->Dimension() == 0 || p_vectorSet->GetValueType() != GetVectorValueType()) { @@ -152,13 +256,17 @@ VectorIndex::BuildIndex(std::shared_ptr p_vectorSet, BuildIndex(p_vectorSet->GetData(), p_vectorSet->Count(), p_vectorSet->Dimension()); m_pMetadata = std::move(p_metadataSet); + if (p_withMetaIndex && m_pMetadata != nullptr) + { + BuildMetaMapping(); + } return ErrorCode::Success; } ErrorCode -VectorIndex::SearchIndex(const void* p_vector, int p_neighborCount, std::vector& p_results) const { - QueryResult res(p_vector, p_neighborCount, p_results); +VectorIndex::SearchIndex(const void* p_vector, int p_neighborCount, bool p_withMeta, BasicResult* p_results) const { + QueryResult res(p_vector, p_neighborCount, p_withMeta, p_results); SearchIndex(res); return ErrorCode::Success; } @@ -170,17 +278,54 @@ VectorIndex::AddIndex(std::shared_ptr p_vectorSet, std::shared_ptrGetData(), p_vectorSet->Count(), p_vectorSet->Dimension()); + + SizeType currStart; + ErrorCode ret = AddIndex(p_vectorSet->GetData(), p_vectorSet->Count(), p_vectorSet->Dimension(), &currStart); + if (ret != ErrorCode::Success) return ret; + if (m_pMetadata == nullptr) { - m_pMetadata = std::move(p_metadataSet); + if (currStart == 0) + m_pMetadata = std::move(p_metadataSet); + else + return ErrorCode::Success; } else { m_pMetadata->AddBatch(*p_metadataSet); } + + if (m_pMetaToVec != nullptr) { + for (SizeType i = 0; i < p_vectorSet->Count(); i++) { + ByteArray meta = m_pMetadata->GetMetadata(currStart + i); + DeleteIndex(meta); + m_pMetaToVec->emplace(std::string((char*)meta.Data(), meta.Length()), currStart + i); + } + } return ErrorCode::Success; } +ErrorCode +VectorIndex::DeleteIndex(ByteArray p_meta) { + if (m_pMetaToVec == nullptr) return ErrorCode::Fail; + + std::string meta((char*)p_meta.Data(), p_meta.Length()); + auto iter = m_pMetaToVec->find(meta); + if (iter != m_pMetaToVec->end()) DeleteIndex(iter->second); + return ErrorCode::Success; +} + + +const void* VectorIndex::GetSample(ByteArray p_meta) +{ + if (m_pMetaToVec == nullptr) return nullptr; + + std::string meta((char*)p_meta.Data(), p_meta.Length()); + auto iter = m_pMetaToVec->find(meta); + if (iter != m_pMetaToVec->end()) return GetSample(iter->second); + return nullptr; +} + + std::shared_ptr VectorIndex::CreateInstance(IndexAlgoType p_algo, VectorValueType p_valuetype) { @@ -223,100 +368,61 @@ ErrorCode VectorIndex::LoadIndex(const std::string& p_loaderFilePath, std::shared_ptr& p_vectorIndex) { Helper::IniReader iniReader; - - if (ErrorCode::Success != iniReader.LoadIniFile(p_loaderFilePath + "/indexloader.ini")) - { - return ErrorCode::FailedOpenFile; - } + if (ErrorCode::Success != iniReader.LoadIniFile(p_loaderFilePath + "/indexloader.ini")) return ErrorCode::FailedOpenFile; IndexAlgoType algoType = iniReader.GetParameter("Index", "IndexAlgoType", IndexAlgoType::Undefined); VectorValueType valueType = iniReader.GetParameter("Index", "ValueType", VectorValueType::Undefined); - if (IndexAlgoType::Undefined == algoType || VectorValueType::Undefined == valueType) - { - return ErrorCode::Fail; - } - if (algoType == IndexAlgoType::BKT) { - switch (valueType) - { -#define DefineVectorValueType(Name, Type) \ - case VectorValueType::Name: \ - p_vectorIndex.reset(new BKT::Index); \ - p_vectorIndex->LoadIndex(p_loaderFilePath); \ - break; \ + p_vectorIndex = CreateInstance(algoType, valueType); + if (p_vectorIndex == nullptr) return ErrorCode::FailedParseValue; -#include "inc/Core/DefinitionList.h" -#undef DefineVectorValueType + return p_vectorIndex->LoadIndex(p_loaderFilePath); +} - default: break; - } - } - else if (algoType == IndexAlgoType::KDT) { - switch (valueType) - { -#define DefineVectorValueType(Name, Type) \ - case VectorValueType::Name: \ - p_vectorIndex.reset(new KDT::Index); \ - p_vectorIndex->LoadIndex(p_loaderFilePath); \ - break; \ -#include "inc/Core/DefinitionList.h" -#undef DefineVectorValueType - default: break; - } - } - return ErrorCode::Success; +ErrorCode +VectorIndex::LoadIndex(const std::string& p_config, const std::vector& p_indexBlobs, std::shared_ptr& p_vectorIndex) +{ + SPTAG::Helper::IniReader iniReader; + std::istringstream p_configin(p_config); + if (SPTAG::ErrorCode::Success != iniReader.LoadIni(p_configin)) return ErrorCode::FailedParseValue; + + IndexAlgoType algoType = iniReader.GetParameter("Index", "IndexAlgoType", IndexAlgoType::Undefined); + VectorValueType valueType = iniReader.GetParameter("Index", "ValueType", VectorValueType::Undefined); + + p_vectorIndex = CreateInstance(algoType, valueType); + if (p_vectorIndex == nullptr) return ErrorCode::FailedParseValue; + + return p_vectorIndex->LoadIndex(p_config, p_indexBlobs); } -ErrorCode VectorIndex::MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2) +ErrorCode +VectorIndex::MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2) { std::string folderPath1(p_indexFilePath1), folderPath2(p_indexFilePath2); - if (!folderPath1.empty() && *(folderPath1.rbegin()) != FolderSep) folderPath1 += FolderSep; - if (!folderPath2.empty() && *(folderPath2.rbegin()) != FolderSep) folderPath2 += FolderSep; - - Helper::IniReader p_configReader1, p_configReader2; - if (ErrorCode::Success != p_configReader1.LoadIniFile(folderPath1 + "/indexloader.ini")) - return ErrorCode::FailedOpenFile; - - if (ErrorCode::Success != p_configReader2.LoadIniFile(folderPath2 + "/indexloader.ini")) - return ErrorCode::FailedOpenFile; - - std::shared_ptr index = CreateInstance( - p_configReader1.GetParameter("Index", "IndexAlgoType", IndexAlgoType::Undefined), - p_configReader1.GetParameter("Index", "ValueType", VectorValueType::Undefined)); - if (index == nullptr) return ErrorCode::FailedParseValue; - - std::string empty(""); - if (!COMMON::DataUtils::MergeIndex(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), - folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty), - folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty), - folderPath2 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), - folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty), - folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty))) - return ErrorCode::Fail; - for (const auto& iter : p_configReader1.GetParameters("Index")) - index->SetParameter(iter.first.c_str(), iter.second.c_str()); - - if (p_configReader1.DoesSectionExist("MetaData")) - { - for (const auto& iter : p_configReader1.GetParameters("MetaData")) - index->SetParameter(iter.first.c_str(), iter.second.c_str()); - index->SetMetadata(folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty), - folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty)); - } + std::shared_ptr index1, index2; + LoadIndex(folderPath1, index1); + LoadIndex(folderPath2, index2); + + std::shared_ptr p_vectorSet; + std::shared_ptr p_metaSet; + size_t vectorSize = GetValueTypeSize(index2->GetVectorValueType()) * index2->GetFeatureDim(); + std::uint64_t offsets[2] = { 0 }; + ByteArray metaoffset((std::uint8_t*)offsets, 2 * sizeof(std::uint64_t), false); + for (SizeType i = 0; i < index2->GetNumSamples(); i++) + if (index2->ContainSample(i)) + { + p_vectorSet.reset(new BasicVectorSet(ByteArray((std::uint8_t*)index2->GetSample(i), vectorSize, false), + index2->GetVectorValueType(), index2->GetFeatureDim(), 1)); + ByteArray meta = index2->GetMetadata(i); + offsets[1] = meta.Length(); + p_metaSet.reset(new MemMetadataSet(meta, metaoffset, 1)); + index1->AddIndex(p_vectorSet, p_metaSet); + } - std::ifstream vecIn(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), std::ios::binary); - int R, C; - vecIn.read((char*)&R, sizeof(int)); - vecIn.read((char*)&C, sizeof(int)); - size_t size = R * C * GetValueTypeSize(index->GetVectorValueType()); - char* data = new char[size]; - vecIn.read(data, size); - vecIn.close(); - index->BuildIndex((void*)data, R, C); - index->SaveIndex(folderPath1); + index1->SaveIndex(folderPath1); return ErrorCode::Success; -} \ No newline at end of file +} diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/VectorSet.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/VectorSet.cpp index 36178d62cb7cdb8f671238b78d3993240d73b990..45dd74dd7849255dd1d6db7832fe0e82ba87f18b 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/Core/VectorSet.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Core/VectorSet.cpp @@ -19,7 +19,7 @@ VectorSet::~VectorSet() BasicVectorSet::BasicVectorSet(const ByteArray& p_bytesArray, VectorValueType p_valueType, - SizeType p_dimension, + DimensionType p_dimension, SizeType p_vectorCount) : m_data(p_bytesArray), m_valueType(p_valueType), @@ -43,15 +43,14 @@ BasicVectorSet::GetValueType() const void* -BasicVectorSet::GetVector(IndexType p_vectorID) const +BasicVectorSet::GetVector(SizeType p_vectorID) const { - if (p_vectorID < 0 || static_cast(p_vectorID) >= m_vectorCount) + if (p_vectorID < 0 || p_vectorID >= m_vectorCount) { return nullptr; } - SizeType offset = static_cast(p_vectorID) * m_perVectorDataSize; - return reinterpret_cast(m_data.Data() + offset); + return reinterpret_cast(m_data.Data() + ((size_t)p_vectorID) * m_perVectorDataSize); } @@ -61,7 +60,7 @@ BasicVectorSet::GetData() const return reinterpret_cast(m_data.Data()); } -SizeType +DimensionType BasicVectorSet::Dimension() const { return m_dimension; @@ -88,8 +87,8 @@ BasicVectorSet::Save(const std::string& p_vectorFile) const FILE * fp = fopen(p_vectorFile.c_str(), "wb"); if (fp == NULL) return ErrorCode::FailedOpenFile; - fwrite(&m_vectorCount, sizeof(int), 1, fp); - fwrite(&m_dimension, sizeof(int), 1, fp); + fwrite(&m_vectorCount, sizeof(SizeType), 1, fp); + fwrite(&m_dimension, sizeof(DimensionType), 1, fp); fwrite((const void*)(m_data.Data()), m_data.Length(), 1, fp); fclose(fp); diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Helper/SimpleIniReader.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Helper/SimpleIniReader.cpp index 7456ad9df30b15baa7e13b71cb20cfc432c6480e..28610dbe19423c21f6b0c31c64c9f2f5dcd70542 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/Helper/SimpleIniReader.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Helper/SimpleIniReader.cpp @@ -25,15 +25,8 @@ IniReader::~IniReader() } -ErrorCode -IniReader::LoadIniFile(const std::string& p_iniFilePath) +ErrorCode IniReader::LoadIni(std::istream& p_input) { - std::ifstream input(p_iniFilePath); - if (!input.is_open()) - { - return ErrorCode::FailedOpenFile; - } - const std::size_t c_bufferSize = 1 << 16; std::unique_ptr line(new char[c_bufferSize]); @@ -51,9 +44,9 @@ IniReader::LoadIniFile(const std::string& p_iniFilePath) return std::isspace(p_ch) != 0; }; - while (!input.eof()) + while (!p_input.eof()) { - if (!input.getline(line.get(), c_bufferSize)) + if (!p_input.getline(line.get(), c_bufferSize)) { break; } @@ -141,11 +134,21 @@ IniReader::LoadIniFile(const std::string& p_iniFilePath) } } } - return ErrorCode::Success; } +ErrorCode +IniReader::LoadIniFile(const std::string& p_iniFilePath) +{ + std::ifstream input(p_iniFilePath); + if (!input.is_open()) return ErrorCode::FailedOpenFile; + ErrorCode ret = LoadIni(input); + input.close(); + return ret; +} + + bool IniReader::DoesSectionExist(const std::string& p_section) const { diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Helper/VectorSetReader.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Helper/VectorSetReader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..44371ae2428427ad715b140eb6bebfa5d8f226f7 --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Helper/VectorSetReader.cpp @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "inc/Helper/VectorSetReader.h" +#include "inc/Helper/VectorSetReaders/DefaultReader.h" + + +using namespace SPTAG; +using namespace SPTAG::Helper; + + +ReaderOptions::ReaderOptions(VectorValueType p_valueType, DimensionType p_dimension, std::string p_vectorDelimiter, std::uint32_t p_threadNum) + : m_threadNum(p_threadNum), m_dimension(p_dimension), m_vectorDelimiter(p_vectorDelimiter), m_inputValueType(p_valueType) +{ + AddOptionalOption(m_threadNum, "-t", "--thread", "Thread Number."); + AddOptionalOption(m_vectorDelimiter, "", "--delimiter", "Vector delimiter."); + AddRequiredOption(m_dimension, "-d", "--dimension", "Dimension of vector."); + AddRequiredOption(m_inputValueType, "-v", "--vectortype", "Input vector data type. Default is float."); +} + + +ReaderOptions::~ReaderOptions() +{ +} + + +VectorSetReader::VectorSetReader(std::shared_ptr p_options) + : m_options(p_options) +{ +} + + +VectorSetReader:: ~VectorSetReader() +{ +} + + +std::shared_ptr +VectorSetReader::CreateInstance(std::shared_ptr p_options) +{ + return std::shared_ptr(new DefaultReader(std::move(p_options))); +} + + diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/VectorSetReaders/DefaultReader.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Helper/VectorSetReaders/DefaultReader.cpp similarity index 92% rename from core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/VectorSetReaders/DefaultReader.cpp rename to core/src/index/thirdparty/SPTAG/AnnService/src/Helper/VectorSetReaders/DefaultReader.cpp index 7f7c4187b0113babbe8b8790ea92ceade8b70e58..4d775f4a509065b7ac1bd1b52a5e884ded350ee8 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/VectorSetReaders/DefaultReader.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Helper/VectorSetReaders/DefaultReader.cpp @@ -1,17 +1,17 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "inc/IndexBuilder/VectorSetReaders/DefaultReader.h" +#include "inc/Helper/VectorSetReaders/DefaultReader.h" #include "inc/Helper/StringConvert.h" #include "inc/Helper/CommonHelper.h" -#include "inc/IndexBuilder/ThreadPool.h" #include #include #include +#include using namespace SPTAG; -using namespace SPTAG::IndexBuilder; +using namespace SPTAG::Helper; namespace { @@ -139,10 +139,13 @@ private: } // namespace Local } // namespace -DefaultReader::DefaultReader(std::shared_ptr p_options) + +DefaultReader::DefaultReader(std::shared_ptr p_options) : VectorSetReader(std::move(p_options)), - m_subTaskBlocksize(0) + m_subTaskBlocksize(0) { + omp_set_num_threads(m_options->m_threadNum); + std::string tempFolder("tempfolder"); if (!direxists(tempFolder.c_str())) { @@ -180,7 +183,7 @@ DefaultReader::LoadFile(const std::string& p_filePaths) { const auto& files = GetFileSizes(p_filePaths); std::vector> subWorks; - subWorks.reserve(files.size() * ThreadPool::CurrentThreadNum()); + subWorks.reserve(files.size() * m_options->m_threadNum); m_subTaskCount = 0; for (const auto& fileInfo : files) @@ -197,7 +200,7 @@ DefaultReader::LoadFile(const std::string& p_filePaths) std::size_t blockSize = m_subTaskBlocksize; if (0 == blockSize) { - fileTaskCount = ThreadPool::CurrentThreadNum(); + fileTaskCount = m_options->m_threadNum; blockSize = (fileInfo.second + fileTaskCount - 1) / fileTaskCount; } else @@ -223,9 +226,10 @@ DefaultReader::LoadFile(const std::string& p_filePaths) m_waitSignal.Reset(m_subTaskCount); - for (auto& workItem : subWorks) +#pragma omp parallel for schedule(dynamic) + for (int64_t i = 0; i < (int64_t)subWorks.size(); i++) { - ThreadPool::Queue(std::move(workItem)); + subWorks[i](); } m_waitSignal.Wait(); @@ -244,7 +248,7 @@ DefaultReader::GetVectorSet() const std::ifstream inputStream; inputStream.open(m_vectorOutput, std::ifstream::binary); - inputStream.seekg(sizeof(uint32_t) + sizeof(uint32_t), std::ifstream::beg); + inputStream.seekg(sizeof(SizeType) + sizeof(DimensionType), std::ifstream::beg); inputStream.read(vecBuf, m_totalRecordVectorBytes); inputStream.close(); @@ -276,7 +280,7 @@ DefaultReader::LoadFileInternal(const std::string& p_filePath, std::ofstream metaStreamContent; std::ofstream metaStreamIndex; - std::uint32_t recordCount = 0; + SizeType recordCount = 0; std::uint64_t metaOffset = 0; std::size_t totalRead = 0; std::streamoff startpos = p_fileBlockID * p_fileBlockSize; @@ -400,12 +404,12 @@ DefaultReader::MergeData() std::unique_ptr bufferHolder(new char[bufferSize]); char* buf = bufferHolder.get(); - std::uint32_t uint32Var = m_totalRecordCount; + SizeType totalRecordCount = m_totalRecordCount; outputStream.open(m_vectorOutput, std::ofstream::binary); - outputStream.write(reinterpret_cast(&uint32Var), sizeof(uint32Var)); - outputStream.write(reinterpret_cast(&(m_options->m_dimension)), sizeof(m_options->m_dimension)); + outputStream.write(reinterpret_cast(&totalRecordCount), sizeof(totalRecordCount)); + outputStream.write(reinterpret_cast(&(m_options->m_dimension)), sizeof(m_options->m_dimension)); for (std::uint32_t i = 0; i < m_subTaskCount; ++i) { @@ -442,7 +446,7 @@ DefaultReader::MergeData() outputStream.open(m_metadataIndexOutput, std::ofstream::binary); - outputStream.write(reinterpret_cast(&uint32Var), sizeof(uint32Var)); + outputStream.write(reinterpret_cast(&totalRecordCount), sizeof(totalRecordCount)); std::uint64_t totalOffset = 0; for (std::uint32_t i = 0; i < m_subTaskCount; ++i) @@ -453,18 +457,18 @@ DefaultReader::MergeData() file += ".tmp"; inputStream.open(file, std::ifstream::binary); - for (std::uint32_t remains = m_subTaskRecordCount[i]; remains > 0;) + for (SizeType remains = m_subTaskRecordCount[i]; remains > 0;) { std::size_t readBytesCount = min(remains * sizeof(std::uint64_t), bufferSizeTrim64); inputStream.read(buf, readBytesCount); std::uint64_t* offset = reinterpret_cast(buf); - for (std::uint32_t i = 0; i < readBytesCount / sizeof(std::uint64_t); ++i) + for (std::uint64_t i = 0; i < readBytesCount / sizeof(std::uint64_t); ++i) { offset[i] += totalOffset; } outputStream.write(buf, readBytesCount); - remains -= static_cast(readBytesCount / sizeof(std::uint64_t)); + remains -= static_cast(readBytesCount / sizeof(std::uint64_t)); } inputStream.read(buf, sizeof(std::uint64_t)); diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/Options.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/Options.cpp index d0fcd0fd8ea964dc0ee073c5e2c1052a0336e34f..6360b73c2a72331d3ea8f68883ef1355dad595a9 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/Options.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/Options.cpp @@ -11,14 +11,8 @@ using namespace SPTAG::IndexBuilder; BuilderOptions::BuilderOptions() - : m_threadNum(32), - m_inputValueType(VectorValueType::Float), - m_vectorDelimiter("|") + : Helper::ReaderOptions(VectorValueType::Float, 0, "|", 32) { - AddOptionalOption(m_threadNum, "-t", "--thread", "Thread Number."); - AddOptionalOption(m_vectorDelimiter, "", "--delimiter", "Vector delimiter."); - AddRequiredOption(m_dimension, "-d", "--dimension", "Dimension of vector."); - AddRequiredOption(m_inputValueType, "-v", "--vectortype", "Input vector data type. Default is float."); AddRequiredOption(m_inputFiles, "-i", "--input", "Input raw data."); AddRequiredOption(m_outputFolder, "-o", "--outputfolder", "Output folder."); AddRequiredOption(m_indexAlgoType, "-a", "--algo", "Index Algorithm type."); diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/VectorSetReader.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/VectorSetReader.cpp deleted file mode 100644 index e50f6f5eb070c22fe6ca1d0cc61b11618b72e1e8..0000000000000000000000000000000000000000 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/VectorSetReader.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "inc/IndexBuilder/VectorSetReader.h" -#include "inc/IndexBuilder/VectorSetReaders/DefaultReader.h" - - -using namespace SPTAG; -using namespace SPTAG::IndexBuilder; - -VectorSetReader::VectorSetReader(std::shared_ptr p_options) - : m_options(p_options) -{ -} - - -VectorSetReader:: ~VectorSetReader() -{ -} - - -std::shared_ptr -VectorSetReader::CreateInstance(std::shared_ptr p_options) -{ - return std::shared_ptr(new DefaultReader(std::move(p_options))); -} - diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/main.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/main.cpp index ba4de4612851a5bb5a341fd62355c3699b5ebc93..040703c3cab42927b8906d02ba8b67a9f7d8cdff 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/main.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/IndexBuilder/main.cpp @@ -1,9 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "inc/IndexBuilder/ThreadPool.h" #include "inc/IndexBuilder/Options.h" -#include "inc/IndexBuilder/VectorSetReader.h" +#include "inc/Helper/VectorSetReader.h" #include "inc/Core/VectorIndex.h" #include "inc/Core/Common.h" #include "inc/Helper/SimpleIniReader.h" @@ -20,7 +19,7 @@ int main(int argc, char* argv[]) { exit(1); } - IndexBuilder::ThreadPool::Init(options->m_threadNum); + auto indexBuilder = VectorIndex::CreateInstance(options->m_indexAlgoType, options->m_inputValueType); Helper::IniReader iniReader; @@ -32,14 +31,14 @@ int main(int argc, char* argv[]) for (int i = 1; i < argc; i++) { std::string param(argv[i]); - int idx = (int)param.find("="); - if (idx < 0) continue; + size_t idx = param.find("="); + if (idx == std::string::npos) continue; std::string paramName = param.substr(0, idx); std::string paramVal = param.substr(idx + 1); std::string sectionName; - idx = (int)paramName.find("."); - if (idx >= 0) { + idx = paramName.find("."); + if (idx != std::string::npos) { sectionName = paramName.substr(0, idx); paramName = paramName.substr(idx + 1); } @@ -63,9 +62,10 @@ int main(int argc, char* argv[]) fprintf(stderr, "Failed to read input file.\n"); exit(1); } - int row, col; - inputStream.read((char*)&row, sizeof(int)); - inputStream.read((char*)&col, sizeof(int)); + SizeType row; + DimensionType col; + inputStream.read((char*)&row, sizeof(SizeType)); + inputStream.read((char*)&col, sizeof(DimensionType)); std::uint64_t totalRecordVectorBytes = ((std::uint64_t)GetValueTypeSize(options->m_inputValueType)) * row * col; ByteArray vectorSet = ByteArray::Alloc(totalRecordVectorBytes); char* vecBuf = reinterpret_cast(vectorSet.Data()); @@ -81,7 +81,7 @@ int main(int argc, char* argv[]) indexBuilder->SaveIndex(options->m_outputFolder); } else { - auto vectorReader = IndexBuilder::VectorSetReader::CreateInstance(options); + auto vectorReader = Helper::VectorSetReader::CreateInstance(options); if (ErrorCode::Success != vectorReader->LoadFile(options->m_inputFiles)) { fprintf(stderr, "Failed to read input file.\n"); diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/IndexSearcher/main.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/IndexSearcher/main.cpp index 316516d92e81cc5bd83d77058089be70ae0bdfe3..0a8c84c2e38c14f15d13dc43cd26937adc31860a 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/IndexSearcher/main.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/IndexSearcher/main.cpp @@ -15,13 +15,13 @@ using namespace SPTAG; template -float CalcRecall(std::vector &results, const std::vector> &truth, int NumQuerys, int K, std::ofstream& log) +float CalcRecall(std::vector &results, const std::vector> &truth, SizeType NumQuerys, int K, std::ofstream& log) { float meanrecall = 0, minrecall = MaxDist, maxrecall = 0, stdrecall = 0; std::vector thisrecall(NumQuerys, 0); - for (int i = 0; i < NumQuerys; i++) + for (SizeType i = 0; i < NumQuerys; i++) { - for (int id : truth[i]) + for (SizeType id : truth[i]) { for (int j = 0; j < K; j++) { @@ -38,7 +38,7 @@ float CalcRecall(std::vector &results, const std::vector maxrecall) maxrecall = thisrecall[i]; } meanrecall /= NumQuerys; - for (int i = 0; i < NumQuerys; i++) + for (SizeType i = 0; i < NumQuerys; i++) { stdrecall += (thisrecall[i] - meanrecall) * (thisrecall[i] - meanrecall); } @@ -47,11 +47,11 @@ float CalcRecall(std::vector &results, const std::vector>& truth, int NumQuerys, int K) +void LoadTruth(std::ifstream& fp, std::vector>& truth, SizeType NumQuerys, int K) { - int get; + SizeType get; std::string line; - for (int i = 0; i < NumQuerys; ++i) + for (SizeType i = 0; i < NumQuerys; ++i) { truth[i].clear(); for (int j = 0; j < K; ++j) @@ -70,8 +70,8 @@ int Process(Helper::IniReader& reader, VectorIndex& index) std::string truthFile = reader.GetParameter("Index", "TruthFile", std::string("truth.txt")); std::string outputFile = reader.GetParameter("Index", "ResultFile", std::string("")); - int numBatchQuerys = reader.GetParameter("Index", "NumBatchQuerys", 10000); - int numDebugQuerys = reader.GetParameter("Index", "NumDebugQuerys", -1); + SizeType numBatchQuerys = reader.GetParameter("Index", "NumBatchQuerys", (SizeType)10000); + SizeType numDebugQuerys = reader.GetParameter("Index", "NumDebugQuerys", (SizeType)-1); int K = reader.GetParameter("Index", "K", 32); std::vector maxCheck = Helper::StrUtils::SplitString(reader.GetParameter("Index", "MaxCheck", std::string("2048")), "#"); @@ -100,13 +100,13 @@ int Process(Helper::IniReader& reader, VectorIndex& index) return -1; } - int numQuerys = (numDebugQuerys >= 0) ? numDebugQuerys : numBatchQuerys; + SizeType numQuerys = (numDebugQuerys >= 0) ? numDebugQuerys : numBatchQuerys; std::vector> Query(numQuerys, std::vector(index.GetFeatureDim(), 0)); - std::vector> truth(numQuerys); + std::vector> truth(numQuerys); std::vector results(numQuerys, QueryResult(NULL, K, 0)); - int * latencies = new int[numQuerys + 1]; + clock_t * latencies = new clock_t[numQuerys + 1]; int base = 1; if (index.GetDistCalcMethod() == DistCalcMethod::Cosine) { @@ -114,7 +114,7 @@ int Process(Helper::IniReader& reader, VectorIndex& index) } int basesquare = base * base; - int dims = index.GetFeatureDim(); + DimensionType dims = index.GetFeatureDim(); std::vector QStrings; while (!inStream.eof()) { @@ -122,43 +122,33 @@ int Process(Helper::IniReader& reader, VectorIndex& index) COMMON::Utils::PrepareQuerys(inStream, QStrings, Query, numQuerys, dims, index.GetDistCalcMethod(), base); if (numQuerys == 0) break; - for (int i = 0; i < numQuerys; i++) results[i].SetTarget(Query[i].data()); + for (SizeType i = 0; i < numQuerys; i++) results[i].SetTarget(Query[i].data()); if (ftruth.is_open()) LoadTruth(ftruth, truth, numQuerys, K); std::cout << " \t[avg] \t[99%] \t[95%] \t[recall] \t[mem]" << std::endl; - int subSize = (numQuerys - 1) / index.GetNumThreads() + 1; + SizeType subSize = (numQuerys - 1) / omp_get_num_threads() + 1; for (std::string& mc : maxCheck) { index.SetParameter("MaxCheck", mc.c_str()); - for (int i = 0; i < numQuerys; i++) results[i].Reset(); + for (SizeType i = 0; i < numQuerys; i++) results[i].Reset(); - if (index.GetNumThreads() == 1) +#pragma omp parallel for + for (int tid = 0; tid < omp_get_num_threads(); tid++) { - for (int i = 0; i < numQuerys; i++) + SizeType start = tid * subSize; + SizeType end = min((tid + 1) * subSize, numQuerys); + for (SizeType i = start; i < end; i++) { latencies[i] = clock(); index.SearchIndex(results[i]); } } - else - { -#pragma omp parallel for - for (int tid = 0; tid < index.GetNumThreads(); tid++) - { - int start = tid * subSize; - int end = min((tid + 1) * subSize, numQuerys); - for (int i = start; i < end; i++) - { - latencies[i] = clock(); - index.SearchIndex(results[i]); - } - } - } + latencies[numQuerys] = clock(); float timeMean = 0, timeMin = MaxDist, timeMax = 0, timeStd = 0; - for (int i = 0; i < numQuerys; i++) + for (SizeType i = 0; i < numQuerys; i++) { if (latencies[i + 1] >= latencies[i]) latencies[i] = latencies[i + 1] - latencies[i]; @@ -169,16 +159,16 @@ int Process(Helper::IniReader& reader, VectorIndex& index) if (latencies[i] < timeMin) timeMin = (float)latencies[i]; } timeMean /= numQuerys; - for (int i = 0; i < numQuerys; i++) timeStd += ((float)latencies[i] - timeMean) * ((float)latencies[i] - timeMean); + for (SizeType i = 0; i < numQuerys; i++) timeStd += ((float)latencies[i] - timeMean) * ((float)latencies[i] - timeMean); timeStd = std::sqrt(timeStd / numQuerys); log << timeMean << " " << timeStd << " " << timeMin << " " << timeMax << " "; - std::sort(latencies, latencies + numQuerys, [](int x, int y) + std::sort(latencies, latencies + numQuerys, [](clock_t x, clock_t y) { return x < y; }); - float l99 = float(latencies[int(numQuerys * 0.99)]) / CLOCKS_PER_SEC; - float l95 = float(latencies[int(numQuerys * 0.95)]) / CLOCKS_PER_SEC; + float l99 = float(latencies[SizeType(numQuerys * 0.99)]) / CLOCKS_PER_SEC; + float l95 = float(latencies[SizeType(numQuerys * 0.95)]) / CLOCKS_PER_SEC; float recall = 0; if (ftruth.is_open()) @@ -202,7 +192,7 @@ int Process(Helper::IniReader& reader, VectorIndex& index) if (fp.is_open()) { fp << std::setprecision(3) << std::fixed; - for (int i = 0; i < numQuerys; i++) + for (SizeType i = 0; i < numQuerys; i++) { fp << QStrings[i] << ":"; for (int j = 0; j < K; j++) @@ -258,13 +248,13 @@ int main(int argc, char** argv) { std::string param(argv[i]); size_t idx = param.find("="); - if (idx < 0) continue; + if (idx == std::string::npos) continue; std::string paramName = param.substr(0, idx); std::string paramVal = param.substr(idx + 1); std::string sectionName; idx = paramName.find("."); - if (idx >= 0) { + if (idx != std::string::npos) { sectionName = paramName.substr(0, idx); paramName = paramName.substr(idx + 1); } diff --git a/core/src/index/thirdparty/SPTAG/AnnService/src/Server/SearchService.cpp b/core/src/index/thirdparty/SPTAG/AnnService/src/Server/SearchService.cpp index a85fdcdb392123994e0815eedb2f64ac90f1384b..83096fbcde59c84cdf88eaf5827224d93653da07 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/src/Server/SearchService.cpp +++ b/core/src/index/thirdparty/SPTAG/AnnService/src/Server/SearchService.cpp @@ -114,7 +114,7 @@ SearchService::Run() void SearchService::RunSocketMode() { - auto threadNum = max((unsigned int)1, m_serviceContext->GetServiceSettings()->m_threadNum); + auto threadNum = max((SizeType)1, m_serviceContext->GetServiceSettings()->m_threadNum); m_threadPool.reset(new boost::asio::thread_pool(threadNum)); Socket::PacketHandlerMapPtr handlerMap(new Socket::PacketHandlerMap); @@ -161,7 +161,7 @@ SearchService::RunInteractiveMode() std::unique_ptr inputBuffer(new char[bufferSize]); while (true) { - fprintf(stdout, "Query: "); + std::cout << "Query: "; if (!fgets(inputBuffer.get(), bufferSize, stdin)) { break; @@ -169,29 +169,28 @@ SearchService::RunInteractiveMode() auto callback = [](std::shared_ptr p_exeContext) { - fprintf(stdout, "Result:\n"); + std::cout << "Result:" << std::endl; if (nullptr == p_exeContext) { - fprintf(stdout, "Not Executed.\n"); + std::cout << "Not Executed." << std::endl; return; } const auto& results = p_exeContext->GetResults(); for (const auto& result : results) { - fprintf(stdout, "Index: %s\n", result.m_indexName.c_str()); + std::cout << "Index: " << result.m_indexName << std::endl; int idx = 0; for (const auto& res : result.m_results) { - fprintf(stdout, "------------------\n"); - fprintf(stdout, "DocIndex: %d Distance: %f", res.VID, res.Dist); + std::cout << "------------------" << std::endl; + std::cout << "DocIndex: " << res.VID << " Distance: " << res.Dist; if (result.m_results.WithMeta()) { const auto& metadata = result.m_results.GetMetadata(idx); - fprintf(stdout, " MetaData: %.*s", static_cast(metadata.Length()), metadata.Data()); + std::cout << " MetaData: " << std::string((char*)metadata.Data(), metadata.Length()); } - - fprintf(stdout, "\n"); + std::cout << std::endl; ++idx; } } diff --git a/core/src/index/thirdparty/SPTAG/CMakeLists.txt b/core/src/index/thirdparty/SPTAG/CMakeLists.txt index 01ff74e5d20f79009feab2481f1dad4105549a27..44544bf7e9ba450518e5fccf96a2bda247221aad 100644 --- a/core/src/index/thirdparty/SPTAG/CMakeLists.txt +++ b/core/src/index/thirdparty/SPTAG/CMakeLists.txt @@ -19,12 +19,12 @@ if(NOT WIN32) endif() if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") - # require at least gcc 4.7 - if (CXX_COMPILER_VERSION VERSION_LESS 4.7) - message(FATAL_ERROR "GCC version must be at least 4.7!") + # require at least gcc 5.0 + if (CXX_COMPILER_VERSION VERSION_LESS 5.0) + message(FATAL_ERROR "GCC version must be at least 5.0!") endif() - set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -lm -lrt -DNDEBUG -std=c++11 -fopenmp -march=native") - set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -ggdb -lm -lrt -DNDEBUG -std=c++11 -fopenmp -march=native") + set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -lm -lrt -DNDEBUG -std=c++14 -fopenmp -march=native") + set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -ggdb -lm -lrt -DNDEBUG -std=c++14 -fopenmp -march=native") elseif(WIN32) if(NOT MSVC14) message(FATAL_ERROR "On Windows, only MSVC version 14 are supported!") @@ -74,54 +74,18 @@ else() message (FATAL_ERROR "Could no find openmp!") endif() -#find_package(Boost 1.67 COMPONENTS system thread serialization wserialization regex) -#if (Boost_FOUND) -# include_directories (${Boost_INCLUDE_DIR}) -# link_directories (${Boost_LIBRARY_DIR} "/usr/lib") -# message (STATUS "Found Boost.") -# message (STATUS "Include Path: ${Boost_INCLUDE_DIRS}") -# message (STATUS "Library Path: ${Boost_LIBRARY_DIRS}") -# message (STATUS "Library: ${Boost_LIBRARIES}") -#else() -# message (FATAL_ERROR "Could not find Boost 1.67!") -#endif() - -#set(Boost_LIBRARIES -# boost_system_static -# boost_filesystem_static -# boost_serialization_static -# boost_wserialization_static -# boost_regex_static -# boost_thread_static) -set(TBB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../tbb) - -if (WIN32) - if (${CMAKE_SIZEOF_VOID_P} EQUAL "8") - set (TBB_LIBRARY_SUFFIX "lib/intel64/vc14") - else() - set (TBB_LIBRARY_SUFFIX "lib/ia32/vc14") - endif() - - find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "C:/Program Files/Intel/TBB" PATH_SUFFIXES include) - find_library(TBB_LIBRARIES tbb${CMAKE_STATIC_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "C:/Program Files/Intel/TBB" PATH_SUFFIXES ${TBB_LIBRARY_SUFFIX}) -else() - find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "/usr/" PATH_SUFFIXES include) - find_library(TBB_LIBRARIES libtbb${CMAKE_SHARED_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "/usr/") -endif() - -set(TBB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../tbb) -find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "/usr/" PATH_SUFFIXES include) -find_library(TBB_LIBRARIES libtbb${CMAKE_SHARED_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "/usr/") - -if (TBB_INCLUDE_DIRS AND TBB_LIBRARIES) - include_directories (${TBB_INCLUDE_DIRS}) - message (STATUS "Found TBB.") - message (STATUS "Include Path:" ${TBB_INCLUDE_DIRS}) - message (STATUS "Library:" ${TBB_LIBRARIES}) +find_package(Boost 1.67 COMPONENTS system thread serialization wserialization regex) +if (Boost_FOUND) + include_directories (${Boost_INCLUDE_DIR}) + link_directories (${Boost_LIBRARY_DIR} "/usr/lib") + message (STATUS "Found Boost.") + message (STATUS "Include Path: ${Boost_INCLUDE_DIRS}") + message (STATUS "Library Path: ${Boost_LIBRARY_DIRS}") + message (STATUS "Library: ${Boost_LIBRARIES}") else() - message (FATAL_ERROR "Could not find TBB!") + message (FATAL_ERROR "Could not find Boost 1.67!") endif() add_subdirectory (AnnService) -#add_subdirectory (Wrappers) -#add_subdirectory (Test) +add_subdirectory (Wrappers) +add_subdirectory (Test) diff --git a/core/src/index/thirdparty/SPTAG/Dockerfile b/core/src/index/thirdparty/SPTAG/Dockerfile index e7cedb33a922662e8cfa52b16055b71f3d3d2cca..7c1acd25e4657fdbe7bef31941e055e794eb318c 100644 --- a/core/src/index/thirdparty/SPTAG/Dockerfile +++ b/core/src/index/thirdparty/SPTAG/Dockerfile @@ -6,7 +6,7 @@ COPY AnnService ./AnnService/ COPY Test ./Test/ COPY Wrappers ./Wrappers/ -RUN apt-get update && apt-get -y install wget build-essential libtbb-dev \ +RUN apt-get update && apt-get -y install wget build-essential \ # remove the following if you don't want to build the wrappers openjdk-8-jdk python3-pip swig diff --git a/core/src/index/thirdparty/SPTAG/README.md b/core/src/index/thirdparty/SPTAG/README.md index 20f2a5458cb0375ab5687cf51c0c22ee498dcd8b..ae4f0aab9b32cb4770bd4fa86de9de305740ada1 100644 --- a/core/src/index/thirdparty/SPTAG/README.md +++ b/core/src/index/thirdparty/SPTAG/README.md @@ -43,7 +43,6 @@ The searches in the trees and the graph are iteratively conducted. * swig >= 3.0 * cmake >= 3.12.0 * boost >= 1.67.0 -* tbb >= 4.2 ### **Install** @@ -66,7 +65,7 @@ Compiling the ALL_BUILD project in the Visual Studio (at least 2015) will genera ```bash docker build -t sptag . ``` -Will build a docker container with binaries in `/app/Release/` +Will build a docker container with binaries in `/app/Release/`. ### **Verify** @@ -75,6 +74,7 @@ Run the test (or Test.exe) in the Release folder to verify all the tests have pa ### **Usage** The detailed usage can be found in [Get started](docs/GettingStart.md). +The detailed parameters tunning can be found in [Parameters](docs/Parameters.md). ## **References** Please cite SPTAG in your publications if it helps your research: diff --git a/core/src/index/thirdparty/SPTAG/SPTAG.sln b/core/src/index/thirdparty/SPTAG/SPTAG.sln index 77adfe2bb71fab60617c6eff5200f94ad3f1cae2..5fdfd0297c6d035d58b986221652e7a8d482f576 100644 --- a/core/src/index/thirdparty/SPTAG/SPTAG.sln +++ b/core/src/index/thirdparty/SPTAG/SPTAG.sln @@ -1,4 +1,3 @@ - Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 14 VisualStudioVersion = 14.0.25420.1 @@ -66,6 +65,22 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "JavaClient", "Wrappers\Java {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CsharpCore", "Wrappers\CsharpCore.vcxproj", "{1896C009-AD46-4A70-B83C-4652A7F37503}" + ProjectSection(ProjectDependencies) = postProject + {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CsharpClient", "Wrappers\CsharpClient.vcxproj", "{363BA3BB-75C4-4CC7-AECB-28C7534B3710}" + ProjectSection(ProjectDependencies) = postProject + {F9A72303-6381-4C80-86FF-606A2F6F7B96} = {F9A72303-6381-4C80-86FF-606A2F6F7B96} + {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLRCore", "Wrappers\CLRCore.vcxproj", "{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}" + ProjectSection(ProjectDependencies) = postProject + {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -162,6 +177,30 @@ Global {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x86.ActiveCfg = Debug|Win32 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x64.ActiveCfg = Release|x64 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x86.ActiveCfg = Release|Win32 + {1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x64.ActiveCfg = Debug|x64 + {1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x64.Build.0 = Debug|x64 + {1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x86.ActiveCfg = Debug|Win32 + {1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x86.Build.0 = Debug|Win32 + {1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x64.ActiveCfg = Release|x64 + {1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x64.Build.0 = Release|x64 + {1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x86.ActiveCfg = Release|Win32 + {1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x86.Build.0 = Release|Win32 + {363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x64.ActiveCfg = Debug|x64 + {363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x64.Build.0 = Debug|x64 + {363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x86.ActiveCfg = Debug|Win32 + {363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x86.Build.0 = Debug|Win32 + {363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x64.ActiveCfg = Release|x64 + {363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x64.Build.0 = Release|x64 + {363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x86.ActiveCfg = Release|Win32 + {363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x86.Build.0 = Release|Win32 + {38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x64.ActiveCfg = Debug|x64 + {38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x64.Build.0 = Debug|x64 + {38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x86.ActiveCfg = Debug|Win32 + {38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x86.Build.0 = Debug|Win32 + {38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x64.ActiveCfg = Release|x64 + {38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x64.Build.0 = Release|x64 + {38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x86.ActiveCfg = Release|Win32 + {38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/core/src/index/thirdparty/SPTAG/Test/CMakeLists.txt b/core/src/index/thirdparty/SPTAG/Test/CMakeLists.txt index e1179631a0ba664a63ad04977c985b4fffab17bc..39166b32a9a90fc52c529cfe2a2ce2433d23efd9 100644 --- a/core/src/index/thirdparty/SPTAG/Test/CMakeLists.txt +++ b/core/src/index/thirdparty/SPTAG/Test/CMakeLists.txt @@ -23,7 +23,7 @@ include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PR file(GLOB TEST_HDR_FILES ${PROJECT_SOURCE_DIR}/Test/inc/Test.h) file(GLOB TEST_SRC_FILES ${PROJECT_SOURCE_DIR}/Test/src/*.cpp) add_executable (test ${TEST_SRC_FILES} ${TEST_HDR_FILES}) -target_link_libraries(test SPTAGLib ${Boost_LIBRARIES} ${TBB_LIBRARIES}) +target_link_libraries(test SPTAGLib ${Boost_LIBRARIES}) install(TARGETS test RUNTIME DESTINATION bin diff --git a/core/src/index/thirdparty/SPTAG/Test/Test.vcxproj b/core/src/index/thirdparty/SPTAG/Test/Test.vcxproj index da9f227498f998d6e4d184f3c56fc3d32a272a60..c479ae5be1f7e44beec262d52604e8a72276f1d2 100644 --- a/core/src/index/thirdparty/SPTAG/Test/Test.vcxproj +++ b/core/src/index/thirdparty/SPTAG/Test/Test.vcxproj @@ -166,8 +166,6 @@ - - @@ -181,7 +179,5 @@ - - \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Test/packages.config b/core/src/index/thirdparty/SPTAG/Test/packages.config index ddc362dffb35a32bd12aebc5449259c3a3d8f026..651c75477976260a5cab6c89a04b9fee19aac7fe 100644 --- a/core/src/index/thirdparty/SPTAG/Test/packages.config +++ b/core/src/index/thirdparty/SPTAG/Test/packages.config @@ -2,6 +2,4 @@ - - \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Test/src/AlgoTest.cpp b/core/src/index/thirdparty/SPTAG/Test/src/AlgoTest.cpp index 5a4d24c260fdfd3e083e0f4af03d375ceace2e4f..a93cd38bed5e6759c6b43ed17b79ecbca830aca1 100644 --- a/core/src/index/thirdparty/SPTAG/Test/src/AlgoTest.cpp +++ b/core/src/index/thirdparty/SPTAG/Test/src/AlgoTest.cpp @@ -5,118 +5,143 @@ #include "inc/Helper/SimpleIniReader.h" #include "inc/Core/VectorIndex.h" +#include + template -void Build(SPTAG::IndexAlgoType algo, std::string distCalcMethod, T* vec, int n, int m) +void Build(SPTAG::IndexAlgoType algo, std::string distCalcMethod, std::shared_ptr& vec, std::shared_ptr& meta, const std::string out) { - std::vector meta; - std::vector metaoffset; - for (int i = 0; i < n; i++) { - metaoffset.push_back(meta.size()); - std::string a = std::to_string(i); - for (int j = 0; j < a.length(); j++) - meta.push_back(a[j]); - } - metaoffset.push_back(meta.size()); - - std::shared_ptr vecset(new SPTAG::BasicVectorSet( - SPTAG::ByteArray((std::uint8_t*)vec, n * m * sizeof(T), false), - SPTAG::GetEnumValueType(), m, n)); - std::shared_ptr metaset(new SPTAG::MemMetadataSet( - SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false), - SPTAG::ByteArray((std::uint8_t*)metaoffset.data(), metaoffset.size() * sizeof(long long), false), - n)); std::shared_ptr vecIndex = SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType()); + BOOST_CHECK(nullptr != vecIndex); + vecIndex->SetParameter("DistCalcMethod", distCalcMethod); + + BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vec, meta)); + BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out)); +} + +template +void BuildWithMetaMapping(SPTAG::IndexAlgoType algo, std::string distCalcMethod, std::shared_ptr& vec, std::shared_ptr& meta, const std::string out) +{ + + std::shared_ptr vecIndex = SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType()); BOOST_CHECK(nullptr != vecIndex); - BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vecset, metaset)); - BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex("origindices")); + + vecIndex->SetParameter("DistCalcMethod", distCalcMethod); + + BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vec, meta, true)); + BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out)); } template -void Search(std::string folder, T* vec, int k) +void Search(const std::string folder, T* vec, SPTAG::SizeType n, int k, std::string* truthmeta) { std::shared_ptr vecIndex; BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex(folder, vecIndex)); BOOST_CHECK(nullptr != vecIndex); - SPTAG::QueryResult res(vec, k, true); - vecIndex->SearchIndex(res); - for (int i = 0; i < k; i++) { - std::cout << res.GetResult(i)->Dist << "@(" << res.GetResult(i)->VID << "," << std::string((char*)res.GetMetadata(i).Data(), res.GetMetadata(i).Length()) << ") "; + for (SPTAG::SizeType i = 0; i < n; i++) + { + SPTAG::QueryResult res(vec, k, true); + vecIndex->SearchIndex(res); + std::unordered_set resmeta; + for (int j = 0; j < k; j++) + { + resmeta.insert(std::string((char*)res.GetMetadata(j).Data(), res.GetMetadata(j).Length())); + std::cout << res.GetResult(j)->Dist << "@(" << res.GetResult(j)->VID << "," << std::string((char*)res.GetMetadata(j).Data(), res.GetMetadata(j).Length()) << ") "; + } + std::cout << std::endl; + for (int j = 0; j < k; j++) + { + BOOST_CHECK(resmeta.find(truthmeta[i * k + j]) != resmeta.end()); + } + vec += vecIndex->GetFeatureDim(); } - std::cout << std::endl; vecIndex.reset(); } template -void Add(T* vec, int n) +void Add(const std::string folder, std::shared_ptr& vec, std::shared_ptr& meta, const std::string out) { std::shared_ptr vecIndex; - BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex("origindices", vecIndex)); + BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex(folder, vecIndex)); BOOST_CHECK(nullptr != vecIndex); - std::vector meta; - std::vector metaoffset; - for (int i = 0; i < n; i++) { - metaoffset.push_back(meta.size()); - std::string a = std::to_string(vecIndex->GetNumSamples() + i); - for (int j = 0; j < a.length(); j++) - meta.push_back(a[j]); - } - metaoffset.push_back(meta.size()); - - int m = vecIndex->GetFeatureDim(); - std::shared_ptr vecset(new SPTAG::BasicVectorSet( - SPTAG::ByteArray((std::uint8_t*)vec, n * m * sizeof(T), false), - SPTAG::GetEnumValueType(), m, n)); - - std::shared_ptr metaset(new SPTAG::MemMetadataSet( - SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false), - SPTAG::ByteArray((std::uint8_t*)metaoffset.data(), metaoffset.size() * sizeof(long long), false), - n)); - - BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->AddIndex(vecset, metaset)); - BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex("addindices")); + BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->AddIndex(vec, meta)); + BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out)); vecIndex.reset(); } template -void Delete(T* vec, int n) +void Delete(const std::string folder, T* vec, SPTAG::SizeType n, const std::string out) { std::shared_ptr vecIndex; - BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex("addindices", vecIndex)); + BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex(folder, vecIndex)); BOOST_CHECK(nullptr != vecIndex); BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->DeleteIndex((const void*)vec, n)); - BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex("delindices")); + BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out)); vecIndex.reset(); } template void Test(SPTAG::IndexAlgoType algo, std::string distCalcMethod) { - int n = 100, q = 3, m = 10, k = 3; + SPTAG::SizeType n = 100, q = 3; + SPTAG::DimensionType m = 10; + int k = 3; std::vector vec; - for (int i = 0; i < n; i++) { - for (int j = 0; j < m; j++) { + for (SPTAG::SizeType i = 0; i < n; i++) { + for (SPTAG::DimensionType j = 0; j < m; j++) { vec.push_back((T)i); } } std::vector query; - for (int i = 0; i < q; i++) { - for (int j = 0; j < m; j++) { + for (SPTAG::SizeType i = 0; i < q; i++) { + for (SPTAG::DimensionType j = 0; j < m; j++) { query.push_back((T)i*2); } } - Build(algo, distCalcMethod, vec.data(), n, m); - Search("origindices", query.data(), k); - Add(query.data(), q); - Search("addindices", query.data(), k); - Delete(query.data(), q); - Search("delindices", query.data(), k); + std::vector meta; + std::vector metaoffset; + for (SPTAG::SizeType i = 0; i < n; i++) { + metaoffset.push_back((std::uint64_t)meta.size()); + std::string a = std::to_string(i); + for (size_t j = 0; j < a.length(); j++) + meta.push_back(a[j]); + } + metaoffset.push_back((std::uint64_t)meta.size()); + + std::shared_ptr vecset(new SPTAG::BasicVectorSet( + SPTAG::ByteArray((std::uint8_t*)vec.data(), sizeof(T) * n * m, false), + SPTAG::GetEnumValueType(), m, n)); + + std::shared_ptr metaset(new SPTAG::MemMetadataSet( + SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false), + SPTAG::ByteArray((std::uint8_t*)metaoffset.data(), metaoffset.size() * sizeof(std::uint64_t), false), + n)); + + Build(algo, distCalcMethod, vecset, metaset, "testindices"); + std::string truthmeta1[] = { "0", "1", "2", "2", "1", "3", "4", "3", "5" }; + Search("testindices", query.data(), q, k, truthmeta1); + + Add("testindices", vecset, metaset, "testindices"); + std::string truthmeta2[] = { "0", "0", "1", "2", "2", "1", "4", "4", "3" }; + Search("testindices", query.data(), q, k, truthmeta2); + + Delete("testindices", query.data(), q, "testindices"); + std::string truthmeta3[] = { "1", "1", "3", "1", "3", "1", "3", "5", "3" }; + Search("testindices", query.data(), q, k, truthmeta3); + + BuildWithMetaMapping(algo, distCalcMethod, vecset, metaset, "testindices"); + std::string truthmeta4[] = { "0", "1", "2", "2", "1", "3", "4", "3", "5" }; + Search("testindices", query.data(), q, k, truthmeta4); + + Add("testindices", vecset, metaset, "testindices"); + std::string truthmeta5[] = { "0", "1", "2", "2", "1", "3", "4", "3", "5" }; + Search("testindices", query.data(), q, k, truthmeta5); } BOOST_AUTO_TEST_SUITE (AlgoTest) diff --git a/core/src/index/thirdparty/SPTAG/Test/src/DistanceTest.cpp b/core/src/index/thirdparty/SPTAG/Test/src/DistanceTest.cpp index 26c5cee3dbf9acadca846e03ac82f2e27682692d..97602a2a8d129f2466ce70507f1a8f0f35cad3a6 100644 --- a/core/src/index/thirdparty/SPTAG/Test/src/DistanceTest.cpp +++ b/core/src/index/thirdparty/SPTAG/Test/src/DistanceTest.cpp @@ -6,7 +6,7 @@ #include "inc/Core/Common/DistanceUtils.h" template -static float ComputeCosineDistance(const T *pX, const T *pY, int length) { +static float ComputeCosineDistance(const T *pX, const T *pY, SPTAG::DimensionType length) { float diff = 0; const T* pEnd1 = pX + length; while (pX < pEnd1) diff += (*pX++) * (*pY++); @@ -14,7 +14,7 @@ static float ComputeCosineDistance(const T *pX, const T *pY, int length) { } template -static float ComputeL2Distance(const T *pX, const T *pY, int length) +static float ComputeL2Distance(const T *pX, const T *pY, SPTAG::DimensionType length) { float diff = 0; const T* pEnd1 = pX + length; @@ -32,10 +32,10 @@ T random(int high = RAND_MAX, int low = 0) // Generates a random value. template void test(int high) { - int dimension = random(256, 2); + SPTAG::DimensionType dimension = random(256, 2); T *X = new T[dimension], *Y = new T[dimension]; BOOST_ASSERT(X != nullptr && Y != nullptr); - for (int i = 0; i < dimension; i++) { + for (SPTAG::DimensionType i = 0; i < dimension; i++) { X[i] = random(high, -high); Y[i] = random(high, -high); } diff --git a/core/src/index/thirdparty/SPTAG/Test/src/Serialize.cpp b/core/src/index/thirdparty/SPTAG/Test/src/Serialize.cpp deleted file mode 100644 index 6f1ed29a7766f9b738e44ed16f170d26ce5dc5e5..0000000000000000000000000000000000000000 --- a/core/src/index/thirdparty/SPTAG/Test/src/Serialize.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "inc/Test.h" -#include "inc/Helper/SimpleIniReader.h" -#include "inc/Core/VectorIndex.h" - - -template -void Test(SPTAG::IndexAlgoType algo, std::string distCalcMethod) { - int n = 100, q = 3, m = 10, k = 3; - std::vector vec; - for (int i = 0; i < n; i++) { - for (int j = 0; j < m; j++) { - vec.push_back((T) i); - } - } - - std::vector query; - for (int i = 0; i < q; i++) { - for (int j = 0; j < m; j++) { - query.push_back((T) i * 2); - } - } - - std::shared_ptr vecset(new SPTAG::BasicVectorSet( - SPTAG::ByteArray((std::uint8_t *) vec.data(), n * m * sizeof(T), false), - SPTAG::GetEnumValueType(), m, n)); - - std::vector blobs; - std::vector len; - { - std::shared_ptr vecIndex = - SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType()); - vecIndex->SetParameter("DistCalcMethod", distCalcMethod); - BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vecset, nullptr)); - BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndexToMemory(blobs, len)); - } - - std::vector clone_blobs; - std::vector clone_len; - for (auto i = 0; i < blobs.size(); ++i) { - auto mem = malloc(len[i]); - BOOST_CHECK(NULL != mem); - memcpy(mem, blobs[i], len[i]); - clone_blobs.push_back(mem); - clone_len.push_back(len[i]); - } - - std::shared_ptr clone_index = - SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType()); - clone_index->SetParameter("DistCalcMethod", distCalcMethod); - BOOST_CHECK(SPTAG::ErrorCode::Success == clone_index->LoadIndexFromMemory(clone_blobs)); - - SPTAG::QueryResult res(vec.data(), k, true); - clone_index->SearchIndex(res); - for (int i = 0; i < k; i++) { - std::cout << res.GetResult(i)->Dist << "@(" << res.GetResult(i)->VID << "," - << std::string((char *) res.GetMetadata(i).Data(), res.GetMetadata(i).Length()) << ") "; - } - std::cout << std::endl; - - for (auto &blob : blobs) - free(blob); - for (auto &blob : clone_blobs) - free(blob); -} - -BOOST_AUTO_TEST_SUITE (SerializeTest) - -BOOST_AUTO_TEST_CASE(KDTree) { - Test(SPTAG::IndexAlgoType::KDT, "L2"); -} - -BOOST_AUTO_TEST_CASE(BKTree) { - Test(SPTAG::IndexAlgoType::BKT, "L2"); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/CLRCore.vcxproj b/core/src/index/thirdparty/SPTAG/Wrappers/CLRCore.vcxproj new file mode 100644 index 0000000000000000000000000000000000000000..efb4d0f259272538e14fba013558da39646b6dcf --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/CLRCore.vcxproj @@ -0,0 +1,141 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {38ACBA6C-2E50-44D4-9A6D-DC735B56E38F} + v4.5.2 + ManagedCProj + CLRCore + 8.1 + + + + + DynamicLibrary + true + v140 + true + Unicode + + + DynamicLibrary + false + v140 + true + Unicode + + + DynamicLibrary + true + v140 + true + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + Microsoft.ANN.SPTAGManaged + .dll + $(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\ + $(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath) + $(OutLibDir);$(LibraryPath) + $(OutAppDir) + + + + true + + + true + + + false + + + false + + + + Level3 + Disabled + _DEBUG;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + NotUsing + true + + + CoreLibrary.lib;%(AdditionalDependencies) + + + + + Level3 + NDEBUG;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + NotUsing + true + + + CoreLibrary.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + {8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942} + + + + + + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/CLRCore.vcxproj.filters b/core/src/index/thirdparty/SPTAG/Wrappers/CLRCore.vcxproj.filters new file mode 100644 index 0000000000000000000000000000000000000000..c0c35e9683c172e0b13023cf9b6dfd1dd1859f55 --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/CLRCore.vcxproj.filters @@ -0,0 +1,32 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {ba4289c4-f872-4dbc-a57f-7b415614afb3} + + + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/CMakeLists.txt b/core/src/index/thirdparty/SPTAG/Wrappers/CMakeLists.txt index cab77a9674c50b8f59207847fb4af6fe11165a4b..514367978e19b39e019a46473b551b4aab89b095 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/CMakeLists.txt +++ b/core/src/index/thirdparty/SPTAG/Wrappers/CMakeLists.txt @@ -5,101 +5,167 @@ find_package(Python2 COMPONENTS Development) if (Python2_FOUND) include_directories (${Python2_INCLUDE_DIRS}) link_directories (${Python2_LIBRARY_DIRS}) - set (Python_INCLUDE_DIRS ${Python2_INCLUDE_DIRS}) + set (Python_INCLUDE_DIRS ${Python2_INCLUDE_DIRS}) set (Python_LIBRARIES ${Python2_LIBRARIES}) - set (Python_FOUND true) + set (Python_FOUND true) else() find_package(Python3 COMPONENTS Development) if (Python3_FOUND) include_directories (${Python3_INCLUDE_DIRS}) link_directories (${Python3_LIBRARY_DIRS}) - set (Python_INCLUDE_DIRS ${Python3_INCLUDE_DIRS}) + set (Python_INCLUDE_DIRS ${Python3_INCLUDE_DIRS}) set (Python_LIBRARIES ${Python3_LIBRARIES}) - set (Python_FOUND true) + set (Python_FOUND true) endif() endif() if (Python_FOUND) - message (STATUS "Found Python.") + message (STATUS "Found Python.") message (STATUS "Include Path: ${Python_INCLUDE_DIRS}") message (STATUS "Library Path: ${Python_LIBRARIES}") - - if (WIN32) - set(PY_SUFFIX .pyd) - else() - set(PY_SUFFIX .so) - endif() - - execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonCore.i) - execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonClient.i) - - include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers) - - file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h) - file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp) - add_library (_SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES}) - set_target_properties(_SPTAG PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX}) - target_link_libraries(_SPTAG SPTAGLib ${Python_LIBRARIES} ${TBB_LIBRARIES}) - add_custom_command(TARGET _SPTAG POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAG.py ${EXECUTABLE_OUTPUT_PATH}) - - file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h) - file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp) - add_library (_SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES}) - set_target_properties(_SPTAGClient PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX}) - target_link_libraries(_SPTAGClient SPTAGLib ${Python_LIBRARIES} ${Boost_LIBRARIES} ${TBB_LIBRARIES}) - add_custom_command(TARGET _SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py ${EXECUTABLE_OUTPUT_PATH}) - - install(TARGETS _SPTAG _SPTAGClient - RUNTIME DESTINATION bin - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) - install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAG.py ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py DESTINATION bin) + + if (WIN32) + set(PY_SUFFIX .pyd) + else() + set(PY_SUFFIX .so) + endif() + + execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonCore.i) + execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonClient.i) + + include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers) + + file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h) + file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp) + add_library (_SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES}) + set_target_properties(_SPTAG PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX}) + target_link_libraries(_SPTAG SPTAGLib ${Python_LIBRARIES}) + add_custom_command(TARGET _SPTAG POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAG.py ${EXECUTABLE_OUTPUT_PATH}) + + file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h) + file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp) + add_library (_SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES}) + set_target_properties(_SPTAGClient PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX}) + target_link_libraries(_SPTAGClient SPTAGLib ${Python_LIBRARIES} ${Boost_LIBRARIES}) + add_custom_command(TARGET _SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py ${EXECUTABLE_OUTPUT_PATH}) + + install(TARGETS _SPTAG _SPTAGClient + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) + install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/SPTAG.py ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py DESTINATION bin) else() - message (STATUS "Could not find Python.") + message (STATUS "Could not find Python.") endif() find_package(JNI) -if (!JNI_FOUND) +if (JNI_FOUND) include_directories (${JNI_INCLUDE_DIRS}) link_directories (${JNI_LIBRARY_DIRS}) message (STATUS "Found JNI.") message (STATUS "Include Path: ${JNI_INCLUDE_DIRS}") message (STATUS "Library Path: ${JNI_LIBRARIES}") - if (WIN32) - set (JAVA_SUFFIX .dll) - else() - set (JAVA_SUFFIX .so) - endif() - - execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaCore.i) - execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaClient.i) - - include_directories(${JNI_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers) - - file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h) - file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp) - add_library (SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES}) - set_target_properties(SPTAG PROPERTIES SUFFIX ${JAVA_SUFFIX}) - target_link_libraries(SPTAG SPTAGLib ${JNI_LIBRARIES} ${TBB_LIBRARIES}) - - file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h) - file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp) - add_library (SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES}) - set_target_properties(SPTAGClient PROPERTIES SUFFIX ${JAVA_SUFFIX}) - target_link_libraries(SPTAGClient SPTAGLib ${JNI_LIBRARIES} ${Boost_LIBRARIES} ${TBB_LIBRARIES}) - - file(GLOB JAVA_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java) - foreach(JAVA_FILE ${JAVA_FILES}) - message (STATUS "Add copy post-command for file " ${JAVA_FILE}) - add_custom_command(TARGET SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${JAVA_FILE} ${EXECUTABLE_OUTPUT_PATH}) - endforeach(JAVA_FILE) - - install(TARGETS SPTAG SPTAGClient - RUNTIME DESTINATION bin - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) - install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java DESTINATION bin) + if (WIN32) + set (JAVA_SUFFIX .dll) + else() + set (JAVA_SUFFIX .so) + endif() + + execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaCore.i) + execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaClient.i) + + include_directories(${JNI_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers) + + file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h) + file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp) + add_library (JAVASPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES}) + set_target_properties(JAVASPTAG PROPERTIES SUFFIX ${JAVA_SUFFIX}) + target_link_libraries(JAVASPTAG SPTAGLib ${JNI_LIBRARIES}) + + file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h) + file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp) + add_library (JAVASPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES}) + set_target_properties(JAVASPTAGClient PROPERTIES SUFFIX ${JAVA_SUFFIX}) + target_link_libraries(JAVASPTAGClient SPTAGLib ${JNI_LIBRARIES} ${Boost_LIBRARIES}) + + file(GLOB JAVA_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java) + foreach(JAVA_FILE ${JAVA_FILES}) + message (STATUS "Add copy post-command for file " ${JAVA_FILE}) + add_custom_command(TARGET JAVASPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${JAVA_FILE} ${EXECUTABLE_OUTPUT_PATH}) + endforeach(JAVA_FILE) + + install(TARGETS JAVASPTAG JAVASPTAGClient + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) + install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java DESTINATION bin) +else() + message (STATUS "Could not find JNI.") +endif() + +if (WIN32) + if (${PROJECTNAME_ARCHITECTURE} MATCHES "x64") + set (csharp_dotnet_framework_hints "$ENV{windir}\\Microsoft.NET\\Framework64") + else() + set (csharp_dotnet_framework_hints "$ENV{windir}\\Microsoft.NET\\Framework") + endif() + + file(GLOB_RECURSE csharp_dotnet_executables ${csharp_dotnet_framework_hints}/csc.exe) + list(SORT csharp_dotnet_executables) + list(REVERSE csharp_dotnet_executables) + foreach (csharp_dotnet_executable ${csharp_dotnet_executables}) + if (NOT DEFINED DOTNET_FOUND) + string(REPLACE "${csharp_dotnet_framework_hints}/" "" csharp_dotnet_version_temp ${csharp_dotnet_executable}) + string(REPLACE "/csc.exe" "" csharp_dotnet_version_temp ${csharp_dotnet_version_temp}) + + set (DOTNET_EXECUTABLE_VERSION "${csharp_dotnet_version_temp}" CACHE STRING "C# .NET compiler version" FORCE) + set (DOTNET_FOUND ${csharp_dotnet_executable}) + endif() + endforeach(csharp_dotnet_executable) +else() + FIND_PROGRAM(DOTNET_FOUND dotnet) +endif() + +if (DOTNET_FOUND) + message (STATUS "Found dotnet.") + message (STATUS "DOTNET_EXECUTABLE: " ${DOTNET_FOUND}) + + if (WIN32) + set (CSHARP_SUFFIX .dll) + else() + set (CSHARP_SUFFIX .so) + endif() + + execute_process(COMMAND swig -csharp -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_cwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CsharpCore.i) + execute_process(COMMAND swig -csharp -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_cwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CsharpClient.i) + + include_directories(${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers) + + file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h) + file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_cwrap.cpp) + add_library (CSHARPSPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES}) + set_target_properties(CSHARPSPTAG PROPERTIES SUFFIX ${CSHARP_SUFFIX}) + target_link_libraries(CSHARPSPTAG SPTAGLib) + + file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h) + file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_cwrap.cpp) + add_library (CSHARPSPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES}) + set_target_properties(CSHARPSPTAGClient PROPERTIES SUFFIX ${CSHARP_SUFFIX}) + target_link_libraries(CSHARPSPTAGClient SPTAGLib ${Boost_LIBRARIES}) + + file(GLOB CSHARP_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.cs) + foreach(CSHARP_FILE ${CSHARP_FILES}) + message (STATUS "Add copy post-command for file " ${CSHARP_FILE}) + add_custom_command(TARGET CSHARPSPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CSHARP_FILE} ${EXECUTABLE_OUTPUT_PATH}) + endforeach(CSHARP_FILE) + + install(TARGETS CSHARPSPTAG CSHARPSPTAGClient + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) + install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.cs DESTINATION bin) else() - message (STATUS "Could not find JNI.") -endif() \ No newline at end of file + message (STATUS "Could not find C#.") +endif() + diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/CsharpClient.vcxproj b/core/src/index/thirdparty/SPTAG/Wrappers/CsharpClient.vcxproj new file mode 100644 index 0000000000000000000000000000000000000000..d7d17102d89f2971ab3d0994349c6f0b9461f751 --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/CsharpClient.vcxproj @@ -0,0 +1,191 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {363BA3BB-75C4-4CC7-AECB-28C7534B3710} + CsharpClient + 8.1 + + + + + DynamicLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + DynamicLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + CSHARPSPTAGClient + .dll + $(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\ + $(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath) + $(OutLibDir);$(LibraryPath) + $(OutAppDir) + + + false + + + + CoreLibrary.lib;SocketLib.lib;%(AdditionalDependencies) + + + + + %(AdditionalIncludeDirectories) + + + + + Level3 + MaxSpeed + true + true + true + true + _WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + _WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + Guard + ProgramDatabase + + + /guard:cf %(AdditionalOptions) + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + + + + + + false + false + false + false + + + + + + + + + + + + + + + + + + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + + + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/CsharpClient.vcxproj.filters b/core/src/index/thirdparty/SPTAG/Wrappers/CsharpClient.vcxproj.filters new file mode 100644 index 0000000000000000000000000000000000000000..589c50014dfbb4dd40a794e9358677fdf6b1848e --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/CsharpClient.vcxproj.filters @@ -0,0 +1,41 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + + + Resource Files + + + Resource Files + + + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/CsharpCore.vcxproj b/core/src/index/thirdparty/SPTAG/Wrappers/CsharpCore.vcxproj new file mode 100644 index 0000000000000000000000000000000000000000..e809d8b9016cb5e6aac2f6d0b24df32432a6ee9e --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/CsharpCore.vcxproj @@ -0,0 +1,134 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {1896C009-AD46-4A70-B83C-4652A7F37503} + CsharpCore + 8.1 + + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + DynamicLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + CSHARPSPTAG + .dll + $(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\ + $(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath) + $(OutLibDir);$(LibraryPath) + $(OutAppDir) + + + false + + + + CoreLibrary.lib;%(AdditionalDependencies) + + + + + _WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + Guard + ProgramDatabase + _WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + + + /guard:cf %(AdditionalOptions) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/CsharpCore.vcxproj.filters b/core/src/index/thirdparty/SPTAG/Wrappers/CsharpCore.vcxproj.filters new file mode 100644 index 0000000000000000000000000000000000000000..51b1ec0ce6c78d3eb36af9416a6a1e5620b06f41 --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/CsharpCore.vcxproj.filters @@ -0,0 +1,40 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {ba4289c4-f872-4dbc-a57f-7b415614afb3} + + + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + + + Resources + + + Resources + + + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/JavaClient.vcxproj b/core/src/index/thirdparty/SPTAG/Wrappers/JavaClient.vcxproj index bc917561d465bf4a9fbbecd852cdb6e392b40a72..2ee36ac6209b14600952e8e6aefe51d756e4c96f 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/JavaClient.vcxproj +++ b/core/src/index/thirdparty/SPTAG/Wrappers/JavaClient.vcxproj @@ -70,7 +70,7 @@ - SPTAGClient + JAVASPTAGClient .dll $(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\ $(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath) @@ -158,6 +158,7 @@ + diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/JavaClient.vcxproj.filters b/core/src/index/thirdparty/SPTAG/Wrappers/JavaClient.vcxproj.filters index e5e9baf576b3a1ebc7cc499ece845fd928787c61..0d047923aa844dc0f40f8768bbf5454ceb565182 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/JavaClient.vcxproj.filters +++ b/core/src/index/thirdparty/SPTAG/Wrappers/JavaClient.vcxproj.filters @@ -34,5 +34,8 @@ Resource Files + + Resource Files + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/JavaCore.vcxproj b/core/src/index/thirdparty/SPTAG/Wrappers/JavaCore.vcxproj index bf11ef9eb6d3475d101c8d511892920f59eb0d13..f15c0e005f71f86ec8f982647ae39749649a1baf 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/JavaCore.vcxproj +++ b/core/src/index/thirdparty/SPTAG/Wrappers/JavaCore.vcxproj @@ -70,7 +70,7 @@ - SPTAG + JAVASPTAG .dll $(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\ $(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath) @@ -106,6 +106,7 @@ + @@ -113,8 +114,6 @@ - - @@ -131,7 +130,5 @@ This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/JavaCore.vcxproj.filters b/core/src/index/thirdparty/SPTAG/Wrappers/JavaCore.vcxproj.filters index ae22a6a6ce23e93da53c54667651186e5c9ed430..851552684d996a8aa20668456c547e33946614a6 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/JavaCore.vcxproj.filters +++ b/core/src/index/thirdparty/SPTAG/Wrappers/JavaCore.vcxproj.filters @@ -30,9 +30,11 @@ - Resources + + Resources + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/PythonClient.vcxproj b/core/src/index/thirdparty/SPTAG/Wrappers/PythonClient.vcxproj index a9cad34019c65f93c8ea966eb8bfc73c56688be7..5cf2c2a9cf668680f06217bbc508db32e18d7e76 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/PythonClient.vcxproj +++ b/core/src/index/thirdparty/SPTAG/Wrappers/PythonClient.vcxproj @@ -158,6 +158,7 @@ + diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/PythonClient.vcxproj.filters b/core/src/index/thirdparty/SPTAG/Wrappers/PythonClient.vcxproj.filters index 928aef15bc0c9b42e869ad04d8dd3b32e003c467..84c71f097700431122238fd722bdc25f09d17cc1 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/PythonClient.vcxproj.filters +++ b/core/src/index/thirdparty/SPTAG/Wrappers/PythonClient.vcxproj.filters @@ -34,5 +34,8 @@ Resource Files + + Resource Files + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/PythonCore.vcxproj b/core/src/index/thirdparty/SPTAG/Wrappers/PythonCore.vcxproj index 2ec11c0a4548cf89005cdd9a6154d54a02be8a51..7555ba97f4d0c04ad51e6ef80f6c1e273b842feb 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/PythonCore.vcxproj +++ b/core/src/index/thirdparty/SPTAG/Wrappers/PythonCore.vcxproj @@ -106,6 +106,7 @@ + @@ -113,8 +114,6 @@ - - @@ -129,7 +128,5 @@ - - \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/PythonCore.vcxproj.filters b/core/src/index/thirdparty/SPTAG/Wrappers/PythonCore.vcxproj.filters index 46fb928dd8d3044a416bfb8b19a0705b80f466ef..8d0ee1d7b9341516bfb10f46b4348fecbecdb44d 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/PythonCore.vcxproj.filters +++ b/core/src/index/thirdparty/SPTAG/Wrappers/PythonCore.vcxproj.filters @@ -30,9 +30,11 @@ - Resources + + Resources + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/inc/CLRCoreInterface.h b/core/src/index/thirdparty/SPTAG/Wrappers/inc/CLRCoreInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..1a273ba8d761de2bd9615e0a027ee173e73bc86f --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/inc/CLRCoreInterface.h @@ -0,0 +1,113 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "ManagedObject.h" +#include "inc/Core/VectorIndex.h" + +using namespace System; + +namespace Microsoft +{ + namespace ANN + { + namespace SPTAGManaged + { + + public ref class BasicResult : + public ManagedObject + { + public: + BasicResult(SPTAG::BasicResult* p_instance) : ManagedObject(p_instance) + { + } + + property int VID + { + public: + int get() + { + return m_Instance->VID; + } + private: + void set(int p_vid) + { + } + } + + property float Dist + { + public: + float get() + { + return m_Instance->Dist; + } + private: + void set(float p_dist) + { + } + } + + property array^ Meta + { + public: + array^ get() + { + array^ buf = gcnew array(m_Instance->Meta.Length()); + Marshal::Copy((IntPtr)m_Instance->Meta.Data(), buf, 0, (int)m_Instance->Meta.Length()); + return buf; + } + private: + void set(array^ p_meta) + { + } + } + }; + + public ref class AnnIndex : + public ManagedObject> + { + public: + AnnIndex(std::shared_ptr p_index); + + AnnIndex(String^ p_algoType, String^ p_valueType, int p_dimension); + + void SetBuildParam(String^ p_name, String^ p_value); + + void SetSearchParam(String^ p_name, String^ p_value); + + bool Build(array^ p_data, int p_num); + + bool BuildWithMetaData(array^ p_data, array^ p_meta, int p_num, bool p_withMetaIndex); + + array^ Search(array^ p_data, int p_resultNum); + + array^ SearchWithMetaData(array^ p_data, int p_resultNum); + + bool Save(String^ p_saveFile); + + array^>^ Dump(); + + bool Add(array^ p_data, int p_num); + + bool AddWithMetaData(array^ p_data, array^ p_meta, int p_num); + + bool Delete(array^ p_data, int p_num); + + bool DeleteByMetaData(array^ p_meta); + + static AnnIndex^ Load(String^ p_loaderFile); + + static AnnIndex^ Load(array^>^ p_index); + + static bool Merge(String^ p_indexFilePath1, String^ p_indexFilePath2); + + private: + + int m_dimension; + + size_t m_inputVectorSize; + }; + } + } +} diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/inc/CoreInterface.h b/core/src/index/thirdparty/SPTAG/Wrappers/inc/CoreInterface.h index e433b67801532f1a64ee65247aceab367ad9e1ad..bc69874746b606e6f3693e2e0b5f58d65e051d45 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/inc/CoreInterface.h +++ b/core/src/index/thirdparty/SPTAG/Wrappers/inc/CoreInterface.h @@ -8,12 +8,15 @@ #include "inc/Core/Common.h" #include "inc/Core/VectorIndex.h" +typedef int SizeType; +typedef int DimensionType; + class AnnIndex { public: - AnnIndex(int p_dimension); + AnnIndex(DimensionType p_dimension); - AnnIndex(const char* p_algoType, const char* p_valueType, int p_dimension); + AnnIndex(const char* p_algoType, const char* p_valueType, DimensionType p_dimension); ~AnnIndex(); @@ -21,9 +24,9 @@ public: void SetSearchParam(const char* p_name, const char* p_value); - bool Build(ByteArray p_data, int p_num); + bool Build(ByteArray p_data, SizeType p_num); - bool BuildWithMetaData(ByteArray p_data, ByteArray p_meta, int p_num); + bool BuildWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num, bool p_withMetaIndex); std::shared_ptr Search(ByteArray p_data, int p_resultNum); @@ -33,14 +36,18 @@ public: bool Save(const char* p_saveFile) const; - bool Add(ByteArray p_data, int p_num); + bool Add(ByteArray p_data, SizeType p_num); + + bool AddWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num); - bool AddWithMetaData(ByteArray p_data, ByteArray p_meta, int p_num); + bool Delete(ByteArray p_data, SizeType p_num); - bool Delete(ByteArray p_data, int p_num); + bool DeleteByMetaData(ByteArray p_meta); static AnnIndex Load(const char* p_loaderFile); + static bool Merge(const char* p_indexFilePath1, const char* p_indexFilePath2); + private: AnnIndex(const std::shared_ptr& p_index); @@ -48,7 +55,7 @@ private: size_t m_inputVectorSize; - int m_dimension; + DimensionType m_dimension; SPTAG::IndexAlgoType m_algoType; diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/inc/CsharpClient.i b/core/src/index/thirdparty/SPTAG/Wrappers/inc/CsharpClient.i new file mode 100644 index 0000000000000000000000000000000000000000..481627a97f2fa061ec5d6dd6c99f3d4664eaffe2 --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/inc/CsharpClient.i @@ -0,0 +1,16 @@ +%module CSHARPSPTAGClient + +%{ +#include "inc/ClientInterface.h" +%} + +%include +%shared_ptr(AnnClient) +%shared_ptr(RemoteSearchResult) +%include "CsharpCommon.i" + +%{ +#define SWIG_FILE_WITH_INIT +%} + +%include "ClientInterface.h" diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/inc/CsharpCommon.i b/core/src/index/thirdparty/SPTAG/Wrappers/inc/CsharpCommon.i new file mode 100644 index 0000000000000000000000000000000000000000..6251d6f245a81a610b71bb9f42850e18ecb8eb70 --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/inc/CsharpCommon.i @@ -0,0 +1,125 @@ +#ifdef SWIGCSHARP + +%{ + struct WrapperArray + { + void * _data; + size_t _size; + }; + + void deleteArrayOfWrapperArray(void* ptr) { + delete[] (WrapperArray*)ptr; + } +%} + +%pragma(csharp) imclasscode=%{ + [System.Runtime.InteropServices.StructLayout(System.Runtime.InteropServices.LayoutKind.Sequential)] + public struct WrapperArray + { + public System.IntPtr _data; + public ulong _size; + public WrapperArray(System.IntPtr in_data, ulong in_size) { _data = in_data; _size = in_size; } + } +%} + +%apply void *VOID_INT_PTR { void * } +void deleteArrayOfWrapperArray(void* ptr); + +%typemap(ctype) ByteArray "WrapperArray" +%typemap(imtype) ByteArray "WrapperArray" +%typemap(cstype) ByteArray "byte[]" +%typemap(in) ByteArray { + $1.Set((std::uint8_t*)$input._data, $input._size, false); +} +%typemap(out) ByteArray { + $result._data = $1.Data(); + $result._size = $1.Length(); +} +%typemap(csin, + pre="unsafe { fixed(byte* ptr$csinput = $csinput) { $modulePINVOKE.WrapperArray temp$csinput = new $modulePINVOKE.WrapperArray( (System.IntPtr)ptr$csinput, (ulong)$csinput.LongLength );", + terminator="} }" + ) ByteArray %{ temp$csinput %} + +%typemap(csvarin) ByteArray %{ + set { + unsafe { fixed(byte* ptr$csinput = $csinput) + { + $modulePINVOKE.WrapperArray temp$csinput = new $modulePINVOKE.WrapperArray( (System.IntPtr)ptr$csinput, (ulong)$csinput.LongLength ); + $imcall; + } + } + } +%} + +%typemap(csout, excode=SWIGEXCODE) ByteArray %{ + $modulePINVOKE.WrapperArray data = $imcall;$excode + byte[] ret = new byte[data._size]; + System.Runtime.InteropServices.Marshal.Copy(data._data, ret, 0, (int)data._size); + return ret; +%} + +%typemap(csvarout) ByteArray %{ + get { + $modulePINVOKE.WrapperArray data = $imcall; + byte[] ret = new byte[data._size]; + System.Runtime.InteropServices.Marshal.Copy(data._data, ret, 0, (int)data._size); + return ret; + } +%} + +%typemap(ctype) std::shared_ptr "WrapperArray" +%typemap(imtype) std::shared_ptr "WrapperArray" +%typemap(cstype) std::shared_ptr "BasicResult[]" +%typemap(out) std::shared_ptr { + $result._data = new WrapperArray[$1->GetResultNum()]; + $result._size = $1->GetResultNum(); + for (int i = 0; i < $1->GetResultNum(); i++) + (((WrapperArray*)$result._data) + i)->_data = new BasicResult(*($1->GetResult(i))); +} +%typemap(csout, excode=SWIGEXCODE) std::shared_ptr { + $modulePINVOKE.WrapperArray data = $imcall; + BasicResult[] ret = new BasicResult[data._size]; + System.IntPtr ptr = data._data; + for (ulong i = 0; i < data._size; i++) { + $modulePINVOKE.WrapperArray arr = ($modulePINVOKE.WrapperArray)System.Runtime.InteropServices.Marshal.PtrToStructure(ptr, typeof($modulePINVOKE.WrapperArray)); + ret[i] = new BasicResult(arr._data, true); + ptr += sizeof($modulePINVOKE.WrapperArray); + } + $modulePINVOKE.deleteArrayOfWrapperArray(data._data); + $excode + return ret; +} + +%typemap(ctype) std::shared_ptr "WrapperArray" +%typemap(imtype) std::shared_ptr "WrapperArray" +%typemap(cstype) std::shared_ptr "BasicResult[]" +%typemap(out) std::shared_ptr { + int combinelen = 0; + int nodelen = (int)(($1->m_allIndexResults).size()); + for (int i = 0; i < nodelen; i++) { + combinelen += $1->m_allIndexResults[i].m_results.GetResultNum(); + } + $result._data = new WrapperArray[combinelen]; + $result._size = combinelen; + size_t copyed = 0; + for (int i = 0; i < nodelen; i++) { + auto& queryResult = $1->m_allIndexResults[i].m_results; + for (int j = 0; j < queryResult.GetResultNum(); j++) + (((WrapperArray*)$result._data) + copyed + j)->_data = new BasicResult(*(queryResult.GetResult(j))); + copyed += queryResult.GetResultNum(); + } +} +%typemap(csout, excode=SWIGEXCODE) std::shared_ptr { + $modulePINVOKE.WrapperArray data = $imcall; + BasicResult[] ret = new BasicResult[data._size]; + System.IntPtr ptr = data._data; + for (ulong i = 0; i < data._size; i++) { + $modulePINVOKE.WrapperArray arr = ($modulePINVOKE.WrapperArray)System.Runtime.InteropServices.Marshal.PtrToStructure(ptr, typeof($modulePINVOKE.WrapperArray)); + ret[i] = new BasicResult(arr._data, true); + ptr += sizeof($modulePINVOKE.WrapperArray); + } + $modulePINVOKE.deleteArrayOfWrapperArray(data._data); + $excode + return ret; +} +#endif diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/inc/CsharpCore.i b/core/src/index/thirdparty/SPTAG/Wrappers/inc/CsharpCore.i new file mode 100644 index 0000000000000000000000000000000000000000..6434239b9050471c18d8e3e794e5e5fcafc10c9c --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/inc/CsharpCore.i @@ -0,0 +1,17 @@ +%module CSHARPSPTAG + +%{ +#include "inc/CoreInterface.h" +%} + +%include +%shared_ptr(AnnIndex) +%shared_ptr(QueryResult) +%include "CsharpCommon.i" + +%{ +#define SWIG_FILE_WITH_INIT +%} + +%include "CoreInterface.h" +%include "../../AnnService/inc/Core/SearchResult.h" diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaClient.i b/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaClient.i index ba8d93fcc07be509abca49130d7105758f8e3d2a..62a274e51aec171ecca99211f7705b12e9ce37ce 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaClient.i +++ b/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaClient.i @@ -1,4 +1,4 @@ -%module SPTAGClient +%module JAVASPTAGClient %{ #include "inc/ClientInterface.h" diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaCommon.i b/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaCommon.i index 8d36675deebee80b066661905f55ef56d4de610a..366052d4f98c2039851e863d5ca882324f7a1fef 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaCommon.i +++ b/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaCommon.i @@ -4,8 +4,8 @@ %typemap(jtype) ByteArray "byte[]" %typemap(jstype) ByteArray "byte[]" %typemap(in) ByteArray { - $1.SetData((std::uint8_t*)JCALL2(GetByteArrayElements, jenv, $input, 0), - JCALL1(GetArrayLength, jenv, $input)); + $1.Set((std::uint8_t*)JCALL2(GetByteArrayElements, jenv, $input, 0), + JCALL1(GetArrayLength, jenv, $input), false); } %typemap(out) ByteArray { $result = JCALL1(NewByteArray, jenv, $1.Length()); @@ -15,10 +15,10 @@ %typemap(javaout) ByteArray { return $jnicall; } %typemap(jni) std::shared_ptr "jobjectArray" -%typemap(jtype) std::shared_ptr "Result[]" -%typemap(jstype) std::shared_ptr "Result[]" +%typemap(jtype) std::shared_ptr "BasicResult[]" +%typemap(jstype) std::shared_ptr "BasicResult[]" %typemap(out) std::shared_ptr { - jclass retClass = jenv->FindClass("Result"); + jclass retClass = jenv->FindClass("BasicResult"); int len = $1->GetResultNum(); $result = jenv->NewObjectArray(len, retClass, NULL); for (int i = 0; i < len; i++) { @@ -31,15 +31,15 @@ %typemap(javaout) std::shared_ptr { return $jnicall; } %typemap(jni) std::shared_ptr "jobjectArray" -%typemap(jtype) std::shared_ptr "Result[]" -%typemap(jstype) std::shared_ptr "Result[]" +%typemap(jtype) std::shared_ptr "BasicResult[]" +%typemap(jstype) std::shared_ptr "BasicResult[]" %typemap(out) std::shared_ptr { int combinelen = 0; int nodelen = (int)(($1->m_allIndexResults).size()); for (int i = 0; i < nodelen; i++) { combinelen += $1->m_allIndexResults[i].m_results.GetResultNum(); } - jclass retClass = jenv->FindClass("Result"); + jclass retClass = jenv->FindClass("BasicResult"); $result = jenv->NewObjectArray(combinelen, retClass, NULL); int id = 0; for (int i = 0; i < nodelen; i++) { diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaCore.i b/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaCore.i index 125ce7e56fceacbd601aad4e6f1124f48e9faac8..78d9dd72e36b2627cea5ed50ee60d2e2a40f4550 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaCore.i +++ b/core/src/index/thirdparty/SPTAG/Wrappers/inc/JavaCore.i @@ -1,4 +1,4 @@ -%module SPTAG +%module JAVASPTAG %{ #include "inc/CoreInterface.h" @@ -14,4 +14,4 @@ %} %include "CoreInterface.h" -%include "TransferDataType.h" +%include "../../AnnService/inc/Core/SearchResult.h" diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/inc/ManagedObject.h b/core/src/index/thirdparty/SPTAG/Wrappers/inc/ManagedObject.h new file mode 100644 index 0000000000000000000000000000000000000000..266d84b440a53a6f7a12ebf236a7360f176bf084 --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/inc/ManagedObject.h @@ -0,0 +1,80 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "inc/Helper/StringConvert.h" + +using namespace System; +using namespace System::Runtime::InteropServices; + +namespace Microsoft +{ + namespace ANN + { + namespace SPTAGManaged + { + /// + /// hold a pointer to an umnanaged object from the core project + /// + template + public ref class ManagedObject + { + protected: + T* m_Instance; + + public: + ManagedObject(T* instance) + :m_Instance(instance) + { + } + + ManagedObject(T& instance) + { + m_Instance = new T(instance); + } + + /// + /// destructor, which is called whenever delete an object with delete keyword + /// + virtual ~ManagedObject() + { + if (m_Instance != nullptr) + { + delete m_Instance; + } + } + + /// + /// finalizer which is called by Garbage Collector whenever it destroys the wrapper object. + /// + !ManagedObject() + { + if (m_Instance != nullptr) + { + delete m_Instance; + } + } + + T* GetInstance() + { + return m_Instance; + } + + static const char* string_to_char_array(String^ string) + { + const char* str = (const char*)(Marshal::StringToHGlobalAnsi(string)).ToPointer(); + return str; + } + + template + static T string_to(String^ string) + { + T data; + SPTAG::Helper::Convert::ConvertStringTo(string_to_char_array(string), data); + return data; + } + }; + } + } +} + diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/inc/TransferDataType.h b/core/src/index/thirdparty/SPTAG/Wrappers/inc/TransferDataType.h index e9eb64def0b86fc89b4c58e522872db83bdcdcf9..51ef9614ab22d0753e432d9ad38a78f2f93cb066 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/inc/TransferDataType.h +++ b/core/src/index/thirdparty/SPTAG/Wrappers/inc/TransferDataType.h @@ -12,15 +12,8 @@ typedef SPTAG::ByteArray ByteArray; typedef SPTAG::QueryResult QueryResult; -typedef SPTAG::Socket::RemoteSearchResult RemoteSearchResult; - -class Result { -public: - int VID; - float Dist; - ByteArray Meta; +typedef SPTAG::BasicResult BasicResult; - Result(int _VID, float _Dist, ByteArray _Meta): VID(_VID), Dist(_Dist), Meta(_Meta) {} -}; +typedef SPTAG::Socket::RemoteSearchResult RemoteSearchResult; #endif // _SPTAG_PW_TRANSFERDATATYPE_H_ diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/packages.config b/core/src/index/thirdparty/SPTAG/Wrappers/packages.config index 667ef75b66016eb7dc89bf54fe06a01d48ff1db2..d780ec4a8eb816baedbdda25881f72b8c7842c02 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/packages.config +++ b/core/src/index/thirdparty/SPTAG/Wrappers/packages.config @@ -3,6 +3,4 @@ - - \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/src/AssemblyInfo.cpp b/core/src/index/thirdparty/SPTAG/Wrappers/src/AssemblyInfo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..43759a83ef03788f176f4b2cf6df54e47b0ca3de --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/src/AssemblyInfo.cpp @@ -0,0 +1,36 @@ +using namespace System; +using namespace System::Reflection; +using namespace System::Runtime::CompilerServices; +using namespace System::Runtime::InteropServices; +using namespace System::Security::Permissions; + +// +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +// +[assembly:AssemblyTitleAttribute(L"CLRCore")]; +[assembly:AssemblyDescriptionAttribute(L"")]; +[assembly:AssemblyConfigurationAttribute(L"")]; +[assembly:AssemblyCompanyAttribute(L"")]; +[assembly:AssemblyProductAttribute(L"CLRCore")]; +[assembly:AssemblyCopyrightAttribute(L"Copyright (c) 2019")]; +[assembly:AssemblyTrademarkAttribute(L"")]; +[assembly:AssemblyCultureAttribute(L"")]; + +// +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the value or you can default the Revision and Build Numbers +// by using the '*' as shown below: + +[assembly:AssemblyVersionAttribute("1.0.*")]; + +[assembly:ComVisible(false)]; + +[assembly:CLSCompliantAttribute(true)]; \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/src/CLRCoreInterface.cpp b/core/src/index/thirdparty/SPTAG/Wrappers/src/CLRCoreInterface.cpp new file mode 100644 index 0000000000000000000000000000000000000000..39e62baf449d6db5856320382a751bc3373c5c46 --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/Wrappers/src/CLRCoreInterface.cpp @@ -0,0 +1,212 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "inc/CLRCoreInterface.h" + + +namespace Microsoft +{ + namespace ANN + { + namespace SPTAGManaged + { + AnnIndex::AnnIndex(std::shared_ptr p_index) : + ManagedObject(p_index) + { + m_dimension = p_index->GetFeatureDim(); + m_inputVectorSize = SPTAG::GetValueTypeSize(p_index->GetVectorValueType()) * m_dimension; + } + + AnnIndex::AnnIndex(String^ p_algoType, String^ p_valueType, int p_dimension) : + ManagedObject(SPTAG::VectorIndex::CreateInstance(string_to(p_algoType), string_to(p_valueType))) + { + m_dimension = p_dimension; + m_inputVectorSize = SPTAG::GetValueTypeSize((*m_Instance)->GetVectorValueType()) * m_dimension; + } + + void AnnIndex::SetBuildParam(String^ p_name, String^ p_value) + { + if (m_Instance != nullptr) + (*m_Instance)->SetParameter(string_to_char_array(p_name), string_to_char_array(p_value)); + } + + void AnnIndex::SetSearchParam(String^ p_name, String^ p_value) + { + if (m_Instance != nullptr) + (*m_Instance)->SetParameter(string_to_char_array(p_name), string_to_char_array(p_value)); + } + + bool AnnIndex::Build(array^ p_data, int p_num) + { + if (m_Instance == nullptr || p_num == 0 || m_dimension == 0 || p_data->LongLength != p_num * m_inputVectorSize) + return false; + + pin_ptr ptr = &p_data[0]; + return (SPTAG::ErrorCode::Success == (*m_Instance)->BuildIndex(ptr, p_num, m_dimension)); + } + + bool AnnIndex::BuildWithMetaData(array^ p_data, array^ p_meta, int p_num, bool p_withMetaIndex) + { + if (m_Instance == nullptr || p_num == 0 || m_dimension == 0 || p_data->LongLength != p_num * m_inputVectorSize) + return false; + + pin_ptr dataptr = &p_data[0]; + std::shared_ptr vectors(new SPTAG::BasicVectorSet(SPTAG::ByteArray(dataptr, p_data->LongLength, false), (*m_Instance)->GetVectorValueType(), m_dimension, p_num)); + + pin_ptr metaptr = &p_meta[0]; + std::uint64_t* offsets = new std::uint64_t[p_num + 1]{ 0 }; + int current = 0; + for (long long i = 0; i < p_meta->LongLength; i++) { + if (((char)metaptr[i]) == '\n') + offsets[++current] = (std::uint64_t)(i + 1); + } + std::shared_ptr meta(new SPTAG::MemMetadataSet(SPTAG::ByteArray(metaptr, p_meta->LongLength, false), SPTAG::ByteArray((std::uint8_t*)offsets, (p_num + 1) * sizeof(std::uint64_t), true), p_num)); + return (SPTAG::ErrorCode::Success == (*m_Instance)->BuildIndex(vectors, meta, p_withMetaIndex)); + } + + array^ AnnIndex::Search(array^ p_data, int p_resultNum) + { + array^ res; + if (m_Instance == nullptr || m_dimension == 0 || p_data->LongLength != m_inputVectorSize) + return res; + + pin_ptr ptr = &p_data[0]; + SPTAG::QueryResult results(ptr, p_resultNum, false); + (*m_Instance)->SearchIndex(results); + + res = gcnew array(p_resultNum); + for (int i = 0; i < p_resultNum; i++) + res[i] = gcnew BasicResult(new SPTAG::BasicResult(*(results.GetResult(i)))); + + return res; + } + + array^ AnnIndex::SearchWithMetaData(array^ p_data, int p_resultNum) + { + array^ res; + if (m_Instance == nullptr || m_dimension == 0 || p_data->LongLength != m_inputVectorSize) + return res; + + pin_ptr ptr = &p_data[0]; + SPTAG::QueryResult results(ptr, p_resultNum, true); + (*m_Instance)->SearchIndex(results); + + res = gcnew array(p_resultNum); + for (int i = 0; i < p_resultNum; i++) + res[i] = gcnew BasicResult(new SPTAG::BasicResult(*(results.GetResult(i)))); + + return res; + } + + bool AnnIndex::Save(String^ p_saveFile) + { + return SPTAG::ErrorCode::Success == (*m_Instance)->SaveIndex(string_to_char_array(p_saveFile)); + } + + array^>^ AnnIndex::Dump() + { + std::shared_ptr> buffersize = (*m_Instance)->CalculateBufferSize(); + array^>^ res = gcnew array^>(buffersize->size() + 1); + std::vector indexBlobs; + for (int i = 1; i < res->Length; i++) + { + res[i] = gcnew array(buffersize->at(i-1)); + pin_ptr ptr = &res[i][0]; + indexBlobs.push_back(SPTAG::ByteArray((std::uint8_t*)ptr, res[i]->LongLength, false)); + } + std::string config; + if (SPTAG::ErrorCode::Success != (*m_Instance)->SaveIndex(config, indexBlobs)) + { + array^>^ null; + return null; + } + res[0] = gcnew array(config.size()); + Marshal::Copy(IntPtr(&config[0]), res[0], 0, config.size()); + return res; + } + + bool AnnIndex::Add(array^ p_data, int p_num) + { + if (m_Instance == nullptr || p_num == 0 || m_dimension == 0 || p_data->LongLength != p_num * m_inputVectorSize) + return false; + + pin_ptr ptr = &p_data[0]; + return (SPTAG::ErrorCode::Success == (*m_Instance)->AddIndex(ptr, p_num, m_dimension)); + } + + bool AnnIndex::AddWithMetaData(array^ p_data, array^ p_meta, int p_num) + { + if (m_Instance == nullptr || p_num == 0 || m_dimension == 0 || p_data->LongLength != p_num * m_inputVectorSize) + return false; + + pin_ptr dataptr = &p_data[0]; + std::shared_ptr vectors(new SPTAG::BasicVectorSet(SPTAG::ByteArray(dataptr, p_data->LongLength, false), (*m_Instance)->GetVectorValueType(), m_dimension, p_num)); + + pin_ptr metaptr = &p_meta[0]; + std::uint64_t* offsets = new std::uint64_t[p_num + 1]{ 0 }; + int current = 0; + for (long long i = 0; i < p_meta->LongLength; i++) { + if (((char)metaptr[i]) == '\n') + offsets[++current] = (std::uint64_t)(i + 1); + } + std::shared_ptr meta(new SPTAG::MemMetadataSet(SPTAG::ByteArray(metaptr, p_meta->LongLength, false), SPTAG::ByteArray((std::uint8_t*)offsets, (p_num + 1) * sizeof(std::uint64_t), true), p_num)); + return (SPTAG::ErrorCode::Success == (*m_Instance)->AddIndex(vectors, meta)); + } + + bool AnnIndex::Delete(array^ p_data, int p_num) + { + if (m_Instance == nullptr || p_num == 0 || m_dimension == 0 || p_data->LongLength != p_num * m_inputVectorSize) + return false; + + pin_ptr ptr = &p_data[0]; + return (SPTAG::ErrorCode::Success == (*m_Instance)->DeleteIndex(ptr, p_num)); + } + + bool AnnIndex::DeleteByMetaData(array^ p_meta) + { + if (m_Instance == nullptr) + return false; + + pin_ptr metaptr = &p_meta[0]; + return (SPTAG::ErrorCode::Success == (*m_Instance)->DeleteIndex(SPTAG::ByteArray(metaptr, p_meta->LongLength, false))); + } + + AnnIndex^ AnnIndex::Load(String^ p_loaderFile) + { + std::shared_ptr vecIndex; + AnnIndex^ res; + if (SPTAG::ErrorCode::Success != SPTAG::VectorIndex::LoadIndex(string_to_char_array(p_loaderFile), vecIndex) || nullptr == vecIndex) + { + res = gcnew AnnIndex(nullptr); + } + else { + res = gcnew AnnIndex(vecIndex); + } + return res; + } + + AnnIndex^ AnnIndex::Load(array^>^ p_index) + { + std::vector p_indexBlobs; + for (int i = 1; i < p_index->Length; i++) + { + pin_ptr ptr = &p_index[i][0]; + p_indexBlobs.push_back(SPTAG::ByteArray((std::uint8_t*)ptr, p_index[i]->LongLength, false)); + } + pin_ptr configptr = &p_index[0][0]; + + std::shared_ptr vecIndex; + if (SPTAG::ErrorCode::Success != SPTAG::VectorIndex::LoadIndex(std::string((char*)configptr, p_index[0]->LongLength), p_indexBlobs, vecIndex) || nullptr == vecIndex) + { + return gcnew AnnIndex(nullptr); + } + return gcnew AnnIndex(vecIndex); + } + + bool AnnIndex::Merge(String^ p_indexFilePath1, String^ p_indexFilePath2) + { + return (SPTAG::ErrorCode::Success == SPTAG::VectorIndex::MergeIndex(string_to_char_array(p_indexFilePath1), string_to_char_array(p_indexFilePath2))); + } + } + } +} \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/Wrappers/src/CoreInterface.cpp b/core/src/index/thirdparty/SPTAG/Wrappers/src/CoreInterface.cpp index 99d545bc0edb30f624881147fe5bc29286bfe21a..5a62fe0315cafd151bfe3a6b1e097cf3693e93d6 100644 --- a/core/src/index/thirdparty/SPTAG/Wrappers/src/CoreInterface.cpp +++ b/core/src/index/thirdparty/SPTAG/Wrappers/src/CoreInterface.cpp @@ -5,7 +5,7 @@ #include "inc/Helper/StringConvert.h" -AnnIndex::AnnIndex(int p_dimension) +AnnIndex::AnnIndex(DimensionType p_dimension) : m_algoType(SPTAG::IndexAlgoType::BKT), m_inputValueType(SPTAG::VectorValueType::Float), m_dimension(p_dimension) @@ -14,7 +14,7 @@ AnnIndex::AnnIndex(int p_dimension) } -AnnIndex::AnnIndex(const char* p_algoType, const char* p_valueType, int p_dimension) +AnnIndex::AnnIndex(const char* p_algoType, const char* p_valueType, DimensionType p_dimension) : m_algoType(SPTAG::IndexAlgoType::Undefined), m_inputValueType(SPTAG::VectorValueType::Undefined), m_dimension(p_dimension) @@ -41,7 +41,7 @@ AnnIndex::~AnnIndex() bool -AnnIndex::Build(ByteArray p_data, int p_num) +AnnIndex::Build(ByteArray p_data, SizeType p_num) { if (nullptr == m_index) { @@ -51,12 +51,12 @@ AnnIndex::Build(ByteArray p_data, int p_num) { return false; } - return (SPTAG::ErrorCode::Success == m_index->BuildIndex(p_data.Data(), p_num, m_dimension)); + return (SPTAG::ErrorCode::Success == m_index->BuildIndex(p_data.Data(), (SPTAG::SizeType)p_num, (SPTAG::DimensionType)m_dimension)); } bool -AnnIndex::BuildWithMetaData(ByteArray p_data, ByteArray p_meta, int p_num) +AnnIndex::BuildWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num, bool p_withMetaIndex) { if (nullptr == m_index) { @@ -69,17 +69,17 @@ AnnIndex::BuildWithMetaData(ByteArray p_data, ByteArray p_meta, int p_num) std::shared_ptr vectors(new SPTAG::BasicVectorSet(p_data, m_inputValueType, - static_cast(m_dimension), - static_cast(p_num))); + static_cast(m_dimension), + static_cast(p_num))); std::uint64_t* offsets = new std::uint64_t[p_num + 1]{ 0 }; - int current = 1; + SizeType current = 1; for (size_t i = 0; i < p_meta.Length(); i++) { if (((char)p_meta.Data()[i]) == '\n') offsets[current++] = (std::uint64_t)(i + 1); } - std::shared_ptr meta(new SPTAG::MemMetadataSet(p_meta, ByteArray((std::uint8_t*)offsets, (p_num + 1) * sizeof(std::uint64_t), true), p_num)); - return (SPTAG::ErrorCode::Success == m_index->BuildIndex(vectors, meta)); + std::shared_ptr meta(new SPTAG::MemMetadataSet(p_meta, ByteArray((std::uint8_t*)offsets, (p_num + 1) * sizeof(std::uint64_t), true), (SPTAG::SizeType)p_num)); + return (SPTAG::ErrorCode::Success == m_index->BuildIndex(vectors, meta, p_withMetaIndex)); } @@ -160,7 +160,7 @@ AnnIndex::Load(const char* p_loaderFile) bool -AnnIndex::Add(ByteArray p_data, int p_num) +AnnIndex::Add(ByteArray p_data, SizeType p_num) { if (nullptr == m_index) { @@ -170,12 +170,12 @@ AnnIndex::Add(ByteArray p_data, int p_num) { return false; } - return (SPTAG::ErrorCode::Success == m_index->AddIndex(p_data.Data(), p_num, m_dimension)); + return (SPTAG::ErrorCode::Success == m_index->AddIndex(p_data.Data(), (SPTAG::SizeType)p_num, (SPTAG::DimensionType)m_dimension)); } bool -AnnIndex::AddWithMetaData(ByteArray p_data, ByteArray p_meta, int p_num) +AnnIndex::AddWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num) { if (nullptr == m_index) { @@ -188,26 +188,43 @@ AnnIndex::AddWithMetaData(ByteArray p_data, ByteArray p_meta, int p_num) std::shared_ptr vectors(new SPTAG::BasicVectorSet(p_data, m_inputValueType, - static_cast(m_dimension), - static_cast(p_num))); + static_cast(m_dimension), + static_cast(p_num))); std::uint64_t* offsets = new std::uint64_t[p_num + 1]{ 0 }; - int current = 1; + SizeType current = 1; for (size_t i = 0; i < p_meta.Length(); i++) { if (((char)p_meta.Data()[i]) == '\n') offsets[current++] = (std::uint64_t)(i + 1); } - std::shared_ptr meta(new SPTAG::MemMetadataSet(p_meta, ByteArray((std::uint8_t*)offsets, (p_num + 1) * sizeof(std::uint64_t), true), p_num)); + std::shared_ptr meta(new SPTAG::MemMetadataSet(p_meta, ByteArray((std::uint8_t*)offsets, (p_num + 1) * sizeof(std::uint64_t), true), (SPTAG::SizeType)p_num)); return (SPTAG::ErrorCode::Success == m_index->AddIndex(vectors, meta)); } bool -AnnIndex::Delete(ByteArray p_data, int p_num) +AnnIndex::Delete(ByteArray p_data, SizeType p_num) { - if (nullptr != m_index && p_num > 0) + if (nullptr == m_index || p_num == 0 || m_dimension == 0 || p_data.Length() != p_num * m_inputVectorSize) { - return (SPTAG::ErrorCode::Success == m_index->DeleteIndex(p_data.Data(), p_num)); + return false; } - return false; + + return (SPTAG::ErrorCode::Success == m_index->DeleteIndex(p_data.Data(), (SPTAG::SizeType)p_num)); +} + + +bool +AnnIndex::DeleteByMetaData(ByteArray p_meta) +{ + if (nullptr == m_index) return false; + + return (SPTAG::ErrorCode::Success == m_index->DeleteIndex(p_meta)); } + + +bool +AnnIndex::Merge(const char* p_indexFilePath1, const char* p_indexFilePath2) +{ + return (SPTAG::ErrorCode::Success == SPTAG::VectorIndex::MergeIndex(p_indexFilePath1, p_indexFilePath2)); +} \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/build.sh b/core/src/index/thirdparty/SPTAG/build.sh deleted file mode 100755 index bd9833d605886768ebebcc4385667a81d160a8e4..0000000000000000000000000000000000000000 --- a/core/src/index/thirdparty/SPTAG/build.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -x - -if [[ -d cmake_build ]]; then - rm cmake_build -rf -fi - -mkdir cmake_build -cd cmake_build - -INSTALL_PREFIX=$(pwd)/../../build - -CMAKE_CMD="cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} ../" - -${CMAKE_CMD} - -make -j8 && make install diff --git a/core/src/index/thirdparty/SPTAG/docs/GettingStart.md b/core/src/index/thirdparty/SPTAG/docs/GettingStart.md index 756aa70ed73942383e8581301ec82b5c632005fd..9f82b680acca13e9a7a4ebd2d20f65cb18856d37 100644 --- a/core/src/index/thirdparty/SPTAG/docs/GettingStart.md +++ b/core/src/index/thirdparty/SPTAG/docs/GettingStart.md @@ -231,6 +231,56 @@ if __name__ == '__main__': testSPTAGClient() ``` - + + ### **C# Support** +> Singlebox CsharpWrapper + ```C# +using System; +using System.Text; + +public class test +{ + static int dimension = 10; + static int n = 10; + static int k = 3; + + static byte[] createFloatArray(int n) + { + byte[] data = new byte[n * dimension * sizeof(float)]; + for (int i = 0; i < n; i++) + for (int j = 0; j < dimension; j++) + Array.Copy(BitConverter.GetBytes((float)i), 0, data, (i * dimension + j) * sizeof(float), 4); + return data; + } + + static byte[] createMetadata(int n) + { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < n; i++) + sb.Append(i.ToString() + '\n'); + return Encoding.ASCII.GetBytes(sb.ToString()); + } + + static void Main() + { + { + AnnIndex idx = new AnnIndex("BKT", "Float", dimension); + idx.SetBuildParam("DistCalcMethod", "L2"); + byte[] data = createFloatArray(n); + byte[] meta = createMetadata(n); + idx.BuildWithMetaData(data, meta, n); + idx.Save("testcsharp"); + } + + AnnIndex index = AnnIndex.Load("testcsharp"); + BasicResult[] res = index.SearchWithMetaData(createFloatArray(1), k); + for (int i = 0; i < res.Length; i++) + Console.WriteLine("result " + i.ToString() + ":" + res[i].Dist.ToString() + "@(" + res[i].VID.ToString() + "," + Encoding.ASCII.GetString(res[i].Meta) + ")"); + Console.WriteLine("test finish!"); + } +} + + ``` + \ No newline at end of file diff --git a/core/src/index/thirdparty/SPTAG/docs/Parameters.md b/core/src/index/thirdparty/SPTAG/docs/Parameters.md new file mode 100644 index 0000000000000000000000000000000000000000..9e2fa93715a4edff089241a38608bfeb5ce5a412 --- /dev/null +++ b/core/src/index/thirdparty/SPTAG/docs/Parameters.md @@ -0,0 +1,159 @@ +## **Parameters** + +> Common Parameters + +| ParametersName | type | default | definition| +|---|---|---|---| +| Samples | int | 1000 | how many points will be sampled to do tree node split | +|TPTNumber | int | 32 | number of TPT trees to help with graph construction | +|TPTLeafSize | int | 2000 | TPT tree leaf size | +NeighborhoodSize | int | 32 | number of neighbors each node has in the neighborhood graph | +|GraphNeighborhoodScale | int | 2 | number of neighborhood size scale in the build stage | +|CEF | int | 1000 | number of results used to construct RNG | +|MaxCheckForRefineGraph| int | 10000 | how many nodes each node will visit during graph refine in the build stage | +|NumberOfThreads | int | 1 | number of threads to uses for speed up the build | +|DistCalcMethod | string | Cosine | choose from Cosine and L2 | +|MaxCheck | int | 8192 | how many nodes will be visited for a query in the search stage + +> BKT + +| ParametersName | type | default | definition| +|---|---|---|---| +| BKTNumber | int | 1 | number of BKT trees | +| BKTKMeansK | int | 32 | how many childs each tree node has | + +> KDT + +| ParametersName | type | default | definition| +|---|---|---|---| +| KDTNumber | int | 1 | number of KDT trees | + +> Parameters that will affect the index size +* NeighborhoodSize +* BKTNumber +* KDTNumber + +> Parameters that will affect the index build time +* NumberOfThreads +* TPTNumber +* TPTLeafSize +* GraphNeighborhoodScale +* CEF +* MaxCheckForRefineGraph + +> Parameters that will affect the index quality +* TPTNumber +* TPTLeafSize +* GraphNeighborhoodScale +* CEF +* MaxCheckForRefineGraph +* NeighborhoodSize +* KDTNumber + +> Parameters that will affect search latency and recall +* MaxCheck + +## **NNI for parameters tuning** + +Prepare vector data file **data.tsv**, query data file **query.tsv**, and truth file **truth.txt** following the format introduced in the [Get Started](GettingStart.md). + +Install [microsoft nni](https://github.com/microsoft/nni) and write the following python code (nni_sptag.py), parameter search space configuration (search_space.json) and nni environment configuration (config.yml). + +> nni_sptag.py + +```Python +import nni +import os + +vector_dimension = 10 +vector_type = 'Float' +index_algo = 'BKT' +threads = 32 +k = 3 + +def main(): + para = nni.get_next_parameter() + cmd_build = "./indexbuilder -d %d -v %s -i data.tsv -o index -a %s -t %d " % (vector_dimension, vector_type, index_algo, threads) + for p, v in para.items(): + cmd_build += "Index." + p + "=" + str(v) + cmd_test = "./indexsearcher index Index.QueryFile=query.tsv Index.TruthFile=truth.txt Index.K=%d" % (k) + os.system(cmd_build) + os.system(cmd_test + " > out.txt") + with open("out.txt", "r") as fd: + lines = fd.readlines() + res = lines[-2] + segs = res.split() + recall = float(segs[-2]) + avg_latency = float(segs[-5]) + score = recall + nni.report_final_result(score) + +if __name__ == '__main__': + main() +``` +> search_space.json + +```json +{ + "BKTKmeansK": {"_type": "choice", "_value": [2, 4, 8, 16, 32]}, + "GraphNeighborhoodScale": {"_type": "choice", "_value": [2, 4, 8, 16, 32]} +} + +``` + +> config.yml + +```yaml +authorName: default + +experimentName: example_sptag + +trialConcurrency: 1 + +maxExecDuration: 1h + +maxTrialNum: 10 + +#choice: local, remote, pai + +trainingServicePlatform: local + +searchSpacePath: search_space.json + +#choice: true, false + +useAnnotation: false + +tuner: + + #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner + + #SMAC (SMAC should be installed through nnictl) + + builtinTunerName: TPE + + classArgs: + + #choice: maximize, minimize + + optimize_mode: maximize + +trial: + + command: python3 nni_sptag.py + + codeDir: . + + gpuNum: 0 + +``` + +Then start the tuning (tunning results can be found in the Web UI urls in the command output): +```bash +nnictl create --config config.yml +``` + +stop the tunning: +```bash +nnictl stop +``` \ No newline at end of file diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt index 0a52a2ed837577886211f374cf630ff4179be00d..7c2a1f1914eab468c5b1f12698e5838bbc8d56aa 100644 --- a/core/src/index/unittest/CMakeLists.txt +++ b/core/src/index/unittest/CMakeLists.txt @@ -4,7 +4,6 @@ include_directories(${CORE_SOURCE_DIR}/knowhere) include_directories(${CORE_SOURCE_DIR}) include_directories(/usr/local/cuda/include) link_directories(/usr/local/cuda/lib64) -link_directories(${CORE_SOURCE_DIR}/thirdparty/tbb) message(STATUS "arrow prefix: ${ARROW_PREFIX}") message(STATUS "libjemalloc_pic path: ${ARROW_PREFIX}/lib/libjemalloc_pic.a") @@ -13,7 +12,6 @@ set(depend_libs gtest gmock gtest_main gmock_main faiss openblas lapack arrow "${ARROW_PREFIX}/lib/libjemalloc_pic.a" - tbb ) set(basic_libs diff --git a/core/src/index/unittest/faiss_ori/CMakeLists.txt b/core/src/index/unittest/faiss_ori/CMakeLists.txt index 6cd490aff96313544196a3f57c308141bfa6a750..ee826948b60b39aab650bdf073cb3a7c99a30f09 100644 --- a/core/src/index/unittest/faiss_ori/CMakeLists.txt +++ b/core/src/index/unittest/faiss_ori/CMakeLists.txt @@ -2,7 +2,6 @@ include_directories(${CORE_SOURCE_DIR}/thirdparty) include_directories(${CORE_SOURCE_DIR}/include) include_directories(/usr/local/cuda/include) link_directories(/usr/local/cuda/lib64) -link_directories(${CORE_SOURCE_DIR}/thirdparty/tbb) set(unittest_libs gtest gmock gtest_main gmock_main) @@ -10,7 +9,6 @@ set(unittest_libs set(depend_libs faiss openblas lapack arrow ${ARROW_PREFIX}/lib/libjemalloc_pic.a - tbb ) set(basic_libs diff --git a/core/src/index/unittest/test_kdt.cpp b/core/src/index/unittest/test_kdt.cpp index 8758fee669460763b34165295584cf056a509f61..f9058cc4d272fccab8e97a73fc0bc4ef5221ad0d 100644 --- a/core/src/index/unittest/test_kdt.cpp +++ b/core/src/index/unittest/test_kdt.cpp @@ -111,56 +111,57 @@ TEST_F(KDTTest, kdt_basic) { } } -TEST_F(KDTTest, kdt_serialize) { - assert(!xb.empty()); - - auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); - index_->set_preprocessor(preprocessor); - - auto model = index_->Train(base_dataset, conf); - // index_->Add(base_dataset, conf); - auto binaryset = index_->Serialize(); - auto new_index = std::make_shared(); - new_index->Load(binaryset); - auto result = new_index->Search(query_dataset, conf); - AssertAnns(result, nq, k); - PrintResult(result, nq, k); - ASSERT_EQ(new_index->Count(), nb); - ASSERT_EQ(new_index->Dimension(), dim); - ASSERT_THROW({ new_index->Clone(); }, knowhere::KnowhereException); - ASSERT_NO_THROW({ new_index->Seal(); }); - - { - int fileno = 0; - const std::string& base_name = "/tmp/kdt_serialize_test_bin_"; - std::vector filename_list; - std::vector> meta_list; - for (auto& iter : binaryset.binary_map_) { - const std::string& filename = base_name + std::to_string(fileno); - FileIOWriter writer(filename); - writer(iter.second->data.get(), iter.second->size); - - meta_list.emplace_back(std::make_pair(iter.first, iter.second->size)); - filename_list.push_back(filename); - ++fileno; - } - - knowhere::BinarySet load_data_list; - for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { - auto bin_size = meta_list[i].second; - FileIOReader reader(filename_list[i]); - - auto load_data = new uint8_t[bin_size]; - reader(load_data, bin_size); - auto data = std::make_shared(); - data.reset(load_data); - load_data_list.Append(meta_list[i].first, data, bin_size); - } - - auto new_index = std::make_shared(); - new_index->Load(load_data_list); - auto result = new_index->Search(query_dataset, conf); - AssertAnns(result, nq, k); - PrintResult(result, nq, k); - } -} +// TODO(zirui): enable test +// TEST_F(KDTTest, kdt_serialize) { +// assert(!xb.empty()); +// +// auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); +// index_->set_preprocessor(preprocessor); +// +// auto model = index_->Train(base_dataset, conf); +// // index_->Add(base_dataset, conf); +// auto binaryset = index_->Serialize(); +// auto new_index = std::make_shared(); +// new_index->Load(binaryset); +// auto result = new_index->Search(query_dataset, conf); +// AssertAnns(result, nq, k); +// PrintResult(result, nq, k); +// ASSERT_EQ(new_index->Count(), nb); +// ASSERT_EQ(new_index->Dimension(), dim); +// ASSERT_THROW({ new_index->Clone(); }, knowhere::KnowhereException); +// ASSERT_NO_THROW({ new_index->Seal(); }); +// +// { +// int fileno = 0; +// const std::string& base_name = "/tmp/kdt_serialize_test_bin_"; +// std::vector filename_list; +// std::vector> meta_list; +// for (auto& iter : binaryset.binary_map_) { +// const std::string& filename = base_name + std::to_string(fileno); +// FileIOWriter writer(filename); +// writer(iter.second->data.get(), iter.second->size); +// +// meta_list.emplace_back(std::make_pair(iter.first, iter.second->size)); +// filename_list.push_back(filename); +// ++fileno; +// } +// +// knowhere::BinarySet load_data_list; +// for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { +// auto bin_size = meta_list[i].second; +// FileIOReader reader(filename_list[i]); +// +// auto load_data = new uint8_t[bin_size]; +// reader(load_data, bin_size); +// auto data = std::make_shared(); +// data.reset(load_data); +// load_data_list.Append(meta_list[i].first, data, bin_size); +// } +// +// auto new_index = std::make_shared(); +// new_index->Load(load_data_list); +// auto result = new_index->Search(query_dataset, conf); +// AssertAnns(result, nq, k); +// PrintResult(result, nq, k); +// } +//} diff --git a/core/src/server/grpc_impl/GrpcRequestTask.cpp b/core/src/server/grpc_impl/GrpcRequestTask.cpp index 3172e73786d923ec0ac04cc66d9ff63804883526..02cb24175aa20352e349f404dc4877e59378abef 100644 --- a/core/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/core/src/server/grpc_impl/GrpcRequestTask.cpp @@ -40,7 +40,7 @@ namespace grpc { static const char* DQL_TASK_GROUP = "dql"; static const char* DDL_DML_TASK_GROUP = "ddl_dml"; -static const char* PING_TASK_GROUP = "ping"; +static const char* INFO_TASK_GROUP = "info"; constexpr int64_t DAY_SECONDS = 24 * 60 * 60; @@ -182,7 +182,7 @@ CreateTableTask::OnExecute() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// DescribeTableTask::DescribeTableTask(const std::string& table_name, ::milvus::grpc::TableSchema* schema) - : GrpcBaseTask(DDL_DML_TASK_GROUP), table_name_(table_name), schema_(schema) { + : GrpcBaseTask(INFO_TASK_GROUP), table_name_(table_name), schema_(schema) { } BaseTaskPtr @@ -288,7 +288,7 @@ CreateIndexTask::OnExecute() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// HasTableTask::HasTableTask(const std::string& table_name, bool& has_table) - : GrpcBaseTask(DDL_DML_TASK_GROUP), table_name_(table_name), has_table_(has_table) { + : GrpcBaseTask(INFO_TASK_GROUP), table_name_(table_name), has_table_(has_table) { } BaseTaskPtr @@ -373,7 +373,7 @@ DropTableTask::OnExecute() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ShowTablesTask::ShowTablesTask(::milvus::grpc::TableNameList* table_name_list) - : GrpcBaseTask(DDL_DML_TASK_GROUP), table_name_list_(table_name_list) { + : GrpcBaseTask(INFO_TASK_GROUP), table_name_list_(table_name_list) { } BaseTaskPtr @@ -683,7 +683,7 @@ SearchTask::OnExecute() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// CountTableTask::CountTableTask(const std::string& table_name, int64_t& row_count) - : GrpcBaseTask(DDL_DML_TASK_GROUP), table_name_(table_name), row_count_(row_count) { + : GrpcBaseTask(INFO_TASK_GROUP), table_name_(table_name), row_count_(row_count) { } BaseTaskPtr @@ -725,7 +725,7 @@ CountTableTask::OnExecute() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// CmdTask::CmdTask(const std::string& cmd, std::string& result) - : GrpcBaseTask(PING_TASK_GROUP), cmd_(cmd), result_(result) { + : GrpcBaseTask(INFO_TASK_GROUP), cmd_(cmd), result_(result) { } BaseTaskPtr @@ -816,7 +816,7 @@ DeleteByRangeTask::OnExecute() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// PreloadTableTask::PreloadTableTask(const std::string& table_name) - : GrpcBaseTask(DDL_DML_TASK_GROUP), table_name_(table_name) { + : GrpcBaseTask(DQL_TASK_GROUP), table_name_(table_name) { } BaseTaskPtr @@ -851,7 +851,7 @@ PreloadTableTask::OnExecute() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// DescribeIndexTask::DescribeIndexTask(const std::string& table_name, ::milvus::grpc::IndexParam* index_param) - : GrpcBaseTask(DDL_DML_TASK_GROUP), table_name_(table_name), index_param_(index_param) { + : GrpcBaseTask(INFO_TASK_GROUP), table_name_(table_name), index_param_(index_param) { } BaseTaskPtr