diff --git a/ci/jenkinsfile/cluster_dev_test.groovy b/ci/jenkinsfile/cluster_dev_test.groovy index 2d8854ca71126b507f952e40283517f870efaf20..4a15b926cf3337fd64f70636a68fde9e2b65e499 100644 --- a/ci/jenkinsfile/cluster_dev_test.groovy +++ b/ci/jenkinsfile/cluster_dev_test.groovy @@ -1,4 +1,4 @@ -timeout(time: 10, unit: 'MINUTES') { +timeout(time: 25, unit: 'MINUTES') { try { dir ("${PROJECT_NAME}_test") { checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:Test/milvus_test.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]]) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 0346e0613838fb5797bccff6e2067d53d2c1d6e5..72cf488cba01a4433ef776cc3e8259081c97224d 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -40,11 +40,15 @@ Please mark all change in change log and use the ticket from JIRA. - MS-394 - Update scheduler unittest - MS-400 - Add timestamp record in task state change function - MS-402 - Add dump implementation for TaskTableItem +- MS-403 - Add GpuCacheMgr - MS-404 - Release index after search task done avoid memory increment continues - MS-405 - Add delete task support ## New Feature - MS-343 - Implement ResourceMgr +- MS-338 - NewAPI: refine code to support CreateIndex +- MS-339 - NewAPI: refine code to support DropIndex +- MS-340 - NewAPI: implement DescribeIndex ## Task - MS-297 - disable mysql unit test @@ -69,6 +73,8 @@ Please mark all change in change log and use the ticket from JIRA. - MS-257 - Update bzip2 download url - MS-288 - Update compile scripts - MS-330 - Stability test failed caused by server core dumped +- MS-347 - Build index hangs again +- MS-382 - fix MySQLMetaImpl::CleanUpFilesWithTTL unknown column bug ## Improvement - MS-156 - Add unittest for merge result functions @@ -97,6 +103,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-324 - Show error when there is not enough gpu memory to build index - MS-328 - Check metric type on server start - MS-332 - Set grpc and thrift server run concurrently +- MS-352 - Add hybrid index ## New Feature - MS-180 - Add new mem manager @@ -156,8 +163,8 @@ Please mark all change in change log and use the ticket from JIRA. - MS-130 - Add prometheus_test - MS-144 - Add nprobe config - MS-147 - Enable IVF - - MS-130 - Add prometheus_test + ## Task - MS-74 - Change README.md in cpp - MS-88 - Add support for arm architecture diff --git a/cpp/build.sh b/cpp/build.sh index 7216296c693431681e0ce812c6fd38fe634d45b9..500eac6c679a2cf63c27f79f9431f1184ce6dee2 100755 --- a/cpp/build.sh +++ b/cpp/build.sh @@ -86,7 +86,7 @@ if [[ ! -d cmake_build ]]; then fi cd cmake_build - +git CUDA_COMPILER=/usr/local/cuda/bin/nvcc if [[ ${MAKE_CLEAN} == "ON" ]]; then diff --git a/cpp/cmake/BuildUtils.cmake b/cpp/cmake/BuildUtils.cmake index 9c8d7638537144d0a6c3f7db44e671c75a1f4842..265cdd0cbcd5021a3edf569fd9d7eda6c5431c9b 100644 --- a/cpp/cmake/BuildUtils.cmake +++ b/cpp/cmake/BuildUtils.cmake @@ -1,11 +1,11 @@ # Define a function that check last file modification -function(Check_Last_Modify cache_ignore_file_path working_dir last_modified_commit_id) +function(Check_Last_Modify cache_check_lists_file_path working_dir last_modified_commit_id) if(EXISTS "${working_dir}") - if(EXISTS "${cache_ignore_file_path}") + if(EXISTS "${cache_check_lists_file_path}") set(GIT_LOG_SKIP_NUM 0) set(_MATCH_ALL ON CACHE BOOL "Match all") set(_LOOP_STATUS ON CACHE BOOL "Whether out of loop") - file(STRINGS ${cache_ignore_file_path} CACHE_IGNORE_TXT) + file(STRINGS ${cache_check_lists_file_path} CACHE_IGNORE_TXT) while(_LOOP_STATUS) foreach(_IGNORE_ENTRY ${CACHE_IGNORE_TXT}) if(NOT _IGNORE_ENTRY MATCHES "^[^#]+") diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index b48724588d3c126adb767da71f3a029f949e7098..f9140b6d80a20a6b30888728350d1af7b7ed0e3e 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -157,7 +157,6 @@ if (UNIX) endif (APPLE) endif (UNIX) - # ---------------------------------------------------------------------- # thirdparty directory set(THIRDPARTY_DIR "${MILVUS_SOURCE_DIR}/thirdparty") @@ -167,7 +166,7 @@ set(THIRDPARTY_DIR "${MILVUS_SOURCE_DIR}/thirdparty") if(NOT DEFINED USE_JFROG_CACHE) set(USE_JFROG_CACHE "OFF") endif() -if(USE_JFROG_CACHE STREQUAL "ON") +if(USE_JFROG_CACHE STREQUAL "ON") set(JFROG_ARTFACTORY_CACHE_URL "http://192.168.1.201:80/artifactory/generic-local/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${MILVUS_BUILD_ARCH}/${BUILD_TYPE}") set(JFROG_USER_NAME "test") set(JFROG_PASSWORD "Fantast1c") @@ -308,9 +307,11 @@ set(EASYLOGGINGPP_MD5 "b78cd319db4be9b639927657b8aa7732") if(DEFINED ENV{MILVUS_FAISS_URL}) set(FAISS_SOURCE_URL "$ENV{MILVUS_FAISS_URL}") else() - set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") + set(FAISS_SOURCE_URL "http://192.168.1.105:6060/jinhai/faiss/-/archive/${FAISS_VERSION}/faiss-${FAISS_VERSION}.tar.gz") + # set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") endif() -set(FAISS_MD5 "0bc12737b23def156f6a1eb782050135") + +set(FAISS_MD5 "a589663865a8558205533c8ac414278c") if(DEFINED ENV{MILVUS_KNOWHERE_URL}) set(KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}") @@ -462,6 +463,7 @@ else() endif() set(GRPC_MD5 "7ec59ad54c85a12dcbbfede09bf413a9") + # ---------------------------------------------------------------------- # ARROW @@ -686,7 +688,7 @@ macro(build_bzip2) set(BZIP2_STATIC_LIB "${BZIP2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}bz2${CMAKE_STATIC_LIBRARY_SUFFIX}") - if(USE_JFROG_CACHE STREQUAL "ON") + if(USE_JFROG_CACHE STREQUAL "ON") set(BZIP2_CACHE_PACKAGE_NAME "bzip2_${BZIP2_MD5}.tar.gz") set(BZIP2_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${BZIP2_CACHE_PACKAGE_NAME}") set(BZIP2_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${BZIP2_CACHE_PACKAGE_NAME}") @@ -1184,7 +1186,7 @@ macro(build_faiss) INTERFACE_INCLUDE_DIRECTORIES "${FAISS_INCLUDE_DIR}" INTERFACE_LINK_LIBRARIES "openblas;lapack" ) endif() - + add_dependencies(faiss faiss_ep) if(${BUILD_FAISS_WITH_MKL} STREQUAL "OFF") @@ -1321,7 +1323,7 @@ if (MILVUS_BUILD_TESTS) if(NOT GTEST_VENDORED) endif() - + get_target_property(GTEST_INCLUDE_DIR gtest INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM "${GTEST_PREFIX}/lib") include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) @@ -1828,7 +1830,7 @@ endmacro() if(MILVUS_WITH_SNAPPY) resolve_dependency(Snappy) - + get_target_property(SNAPPY_INCLUDE_DIRS snappy INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM ${SNAPPY_PREFIX}/lib/) include_directories(SYSTEM ${SNAPPY_INCLUDE_DIRS}) @@ -2131,7 +2133,7 @@ endmacro() if(MILVUS_WITH_YAMLCPP) resolve_dependency(yaml-cpp) - + get_target_property(YAMLCPP_INCLUDE_DIR yaml-cpp INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM ${YAMLCPP_PREFIX}/lib/) include_directories(SYSTEM ${YAMLCPP_INCLUDE_DIR}) @@ -2203,7 +2205,7 @@ endmacro() if(MILVUS_WITH_ZLIB) resolve_dependency(ZLIB) - + get_target_property(ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) endif() @@ -2301,7 +2303,7 @@ endmacro() if(MILVUS_WITH_ZSTD) resolve_dependency(ZSTD) - + get_target_property(ZSTD_INCLUDE_DIR zstd INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM ${ZSTD_PREFIX}/lib) include_directories(SYSTEM ${ZSTD_INCLUDE_DIR}) @@ -2406,7 +2408,7 @@ endmacro() if(MILVUS_WITH_AWS) resolve_dependency(AWS) - + link_directories(SYSTEM ${AWS_PREFIX}/lib) get_target_property(AWS_CPP_SDK_S3_INCLUDE_DIR aws-cpp-sdk-s3 INTERFACE_INCLUDE_DIRECTORIES) diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template index c80e981bcd002e6d4d151c0231add6d852d3cd97..107b4a10605f4c530e1d7232c4e39d7dfb1de33a 100644 --- a/cpp/conf/server_config.template +++ b/cpp/conf/server_config.template @@ -36,8 +36,11 @@ license_config: # license configure cache_config: # cache configure cpu_cache_capacity: 16 # how many memory are used as cache, unit: GB, range: 0 ~ less than total memory - cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0 + cpu_cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0 insert_cache_immediately: false # insert data will be load into cache immediately for hot query + gpu_cache_capacity: 5 # how many memory are used as cache in gpu, unit: GB, RANGE: 0 ~ less than total memory + gpu_cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0 + gpu_ids: 0,1 # gpu id engine_config: nprobe: 10 diff --git a/cpp/src/cache/CacheMgr.cpp b/cpp/src/cache/CacheMgr.cpp index 5e54c9abe10f375fb5d09da07eafd644aa37b3c5..977c7e1c426e2b36daf0b75e84427637da766e02 100644 --- a/cpp/src/cache/CacheMgr.cpp +++ b/cpp/src/cache/CacheMgr.cpp @@ -46,7 +46,7 @@ DataObjPtr CacheMgr::GetItem(const std::string& key) { return cache_->get(key); } -engine::Index_ptr CacheMgr::GetIndex(const std::string& key) { +engine::VecIndexPtr CacheMgr::GetIndex(const std::string& key) { DataObjPtr obj = GetItem(key); if(obj != nullptr) { return obj->data(); @@ -65,7 +65,7 @@ void CacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) { server::Metrics::GetInstance().CacheAccessTotalIncrement(); } -void CacheMgr::InsertItem(const std::string& key, const engine::Index_ptr& index) { +void CacheMgr::InsertItem(const std::string& key, const engine::VecIndexPtr& index) { if(cache_ == nullptr) { SERVER_LOG_ERROR << "Cache doesn't exist"; return; diff --git a/cpp/src/cache/CacheMgr.h b/cpp/src/cache/CacheMgr.h index 003c883be7cce96eb03ba0ba323fa6327736a24f..b6f1ec8ef1b9e8f4787ff18a61b21475784112f3 100644 --- a/cpp/src/cache/CacheMgr.h +++ b/cpp/src/cache/CacheMgr.h @@ -19,10 +19,10 @@ public: virtual bool ItemExists(const std::string& key); virtual DataObjPtr GetItem(const std::string& key); - virtual engine::Index_ptr GetIndex(const std::string& key); + virtual engine::VecIndexPtr GetIndex(const std::string& key); virtual void InsertItem(const std::string& key, const DataObjPtr& data); - virtual void InsertItem(const std::string& key, const engine::Index_ptr& index); + virtual void InsertItem(const std::string& key, const engine::VecIndexPtr& index); virtual void EraseItem(const std::string& key); diff --git a/cpp/src/cache/CpuCacheMgr.h b/cpp/src/cache/CpuCacheMgr.h index 8b0f98e6b481aac47721f311d11ca8d448789ea4..39e33aef8918be49269b08995b554efcaa7780ec 100644 --- a/cpp/src/cache/CpuCacheMgr.h +++ b/cpp/src/cache/CpuCacheMgr.h @@ -16,6 +16,7 @@ private: CpuCacheMgr(); public: + //TODO: use smart pointer instead static CacheMgr* GetInstance() { static CpuCacheMgr s_mgr; return &s_mgr; diff --git a/cpp/src/cache/DataObj.h b/cpp/src/cache/DataObj.h index 341df34174fe7f1124aacaac5aad05bbb08662ef..d9c14f4d1bb5af82619ba73c297329fa1e0f6ead 100644 --- a/cpp/src/cache/DataObj.h +++ b/cpp/src/cache/DataObj.h @@ -6,7 +6,7 @@ #pragma once -#include "wrapper/Index.h" +#include "wrapper/knowhere/vec_index.h" #include @@ -16,17 +16,17 @@ namespace cache { class DataObj { public: - DataObj(const engine::Index_ptr& index) + DataObj(const engine::VecIndexPtr& index) : index_(index) {} - DataObj(const engine::Index_ptr& index, int64_t size) + DataObj(const engine::VecIndexPtr& index, int64_t size) : index_(index), size_(size) {} - engine::Index_ptr data() { return index_; } - const engine::Index_ptr& data() const { return index_; } + engine::VecIndexPtr data() { return index_; } + const engine::VecIndexPtr& data() const { return index_; } int64_t size() const { if(index_ == nullptr) { @@ -41,7 +41,7 @@ public: } private: - engine::Index_ptr index_ = nullptr; + engine::VecIndexPtr index_ = nullptr; int64_t size_ = 0; }; diff --git a/cpp/src/cache/GpuCacheMgr.cpp b/cpp/src/cache/GpuCacheMgr.cpp index 13eec4f2b661e148640492eabb40ad701a1d0f50..4aa562634885805dcc8c8593329add6cc4b500a3 100644 --- a/cpp/src/cache/GpuCacheMgr.cpp +++ b/cpp/src/cache/GpuCacheMgr.cpp @@ -4,6 +4,8 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#include +#include "utils/Log.h" #include "GpuCacheMgr.h" #include "server/ServerConfig.h" @@ -11,19 +13,62 @@ namespace zilliz { namespace milvus { namespace cache { +std::mutex GpuCacheMgr::mutex_; +std::unordered_map GpuCacheMgr::instance_; + namespace { constexpr int64_t unit = 1024 * 1024 * 1024; + + std::vector load() { + server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); + std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1"); + + std::vector gpu_ids; + + std::stringstream ss(gpu_ids_str); + for (int i; ss >> i;) { + gpu_ids.push_back(i); + if (ss.peek() == ',') { + ss.ignore(); + } + } + return gpu_ids; + } +} + + +bool GpuCacheMgr::GpuIdInConfig(uint64_t gpu_id) { + static std::vector ids = load(); + for (auto id : ids) { + if (gpu_id == id) return true; + } + return false; } GpuCacheMgr::GpuCacheMgr() { server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); - int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 1); + + int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 2); cap *= unit; cache_ = std::make_shared(cap, 1UL<<32); + + double free_percent = config.GetDoubleValue(server::GPU_CACHE_FREE_PERCENT, 0.85); + if (free_percent > 0.0 && free_percent <= 1.0) { + cache_->set_freemem_percent(free_percent); + } else { + SERVER_LOG_ERROR << "Invalid gpu_cache_free_percent: " << free_percent << + ", defaultly set to " << cache_->freemem_percent(); + } } void GpuCacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) { //TODO: copy data to gpu + if (cache_ == nullptr) { + SERVER_LOG_ERROR << "Cache doesn't exist"; + return; + } + + cache_->insert(key, data); } } diff --git a/cpp/src/cache/GpuCacheMgr.h b/cpp/src/cache/GpuCacheMgr.h index 4efec08cec772225d750b8a3df567b3cdb37dc1f..f26dfaa1b7912c607f0befef008dbfe58c9d1551 100644 --- a/cpp/src/cache/GpuCacheMgr.h +++ b/cpp/src/cache/GpuCacheMgr.h @@ -5,22 +5,41 @@ //////////////////////////////////////////////////////////////////////////////// #include "CacheMgr.h" +#include +#include namespace zilliz { namespace milvus { namespace cache { +class GpuCacheMgr; +using GpuCacheMgrPtr = std::shared_ptr; + class GpuCacheMgr : public CacheMgr { -private: +public: GpuCacheMgr(); -public: - static CacheMgr* GetInstance() { - static GpuCacheMgr s_mgr; - return &s_mgr; + static bool GpuIdInConfig(uint64_t gpu_id); + + static CacheMgr* GetInstance(uint64_t gpu_id) { + if (instance_.find(gpu_id) == instance_.end()) { + std::lock_guard lock(mutex_); + if (instance_.find(gpu_id) == instance_.end()) { + if (GpuIdInConfig(gpu_id)) { + instance_.insert(std::pair(gpu_id, std::make_shared())); + } else { + return nullptr; + } + } + } + return instance_[gpu_id].get(); } void InsertItem(const std::string& key, const DataObjPtr& data) override; + +private: + static std::mutex mutex_; + static std::unordered_map instance_; }; } diff --git a/cpp/src/config/YamlConfigMgr.cpp b/cpp/src/config/YamlConfigMgr.cpp index 9a34ef3e63c978c184d859cc5b3dc6997ab7ac06..ee935bf32ccf460fe128afc7de3738401b593667 100644 --- a/cpp/src/config/YamlConfigMgr.cpp +++ b/cpp/src/config/YamlConfigMgr.cpp @@ -73,19 +73,19 @@ YamlConfigMgr::SetChildConfig(const YAML::Node& node, return false; } -bool -YamlConfigMgr::SetSequence(const YAML::Node &node, - const std::string &child_name, - ConfigNode &config) { - if(node[child_name].IsDefined ()) { - size_t cnt = node[child_name].size(); - for(size_t i = 0; i < cnt; i++){ - config.AddSequenceItem(child_name, node[child_name][i].as()); - } - return true; - } - return false; -} +//bool +//YamlConfigMgr::SetSequence(const YAML::Node &node, +// const std::string &child_name, +// ConfigNode &config) { +// if(node[child_name].IsDefined ()) { +// size_t cnt = node[child_name].size(); +// for(size_t i = 0; i < cnt; i++){ +// config.AddSequenceItem(child_name, node[child_name][i].as()); +// } +// return true; +// } +// return false; +//} void YamlConfigMgr::LoadConfigNode(const YAML::Node& node, ConfigNode& config) { @@ -98,8 +98,8 @@ YamlConfigMgr::LoadConfigNode(const YAML::Node& node, ConfigNode& config) { SetConfigValue(node, key, config); } else if(node[key].IsMap()){ SetChildConfig(node, key, config); - } else if(node[key].IsSequence()){ - SetSequence(node, key, config); +// } else if(node[key].IsSequence()){ +// SetSequence(node, key, config); } } } diff --git a/cpp/src/config/YamlConfigMgr.h b/cpp/src/config/YamlConfigMgr.h index b8828b7a8c297598f3500ca21a86943c9b21e957..05b55d9da526e64e05c597386b9ab4f6cd11145c 100644 --- a/cpp/src/config/YamlConfigMgr.h +++ b/cpp/src/config/YamlConfigMgr.h @@ -33,10 +33,10 @@ class YamlConfigMgr : public IConfigMgr { const std::string &name, ConfigNode &config); - bool - SetSequence(const YAML::Node &node, - const std::string &child_name, - ConfigNode &config); +// bool +// SetSequence(const YAML::Node &node, +// const std::string &child_name, +// ConfigNode &config); void LoadConfigNode(const YAML::Node& node, ConfigNode& config); diff --git a/cpp/src/db/Constants.h b/cpp/src/db/Constants.h index e94dfa6aea8710948908b0ea3fe4bb2e2793ee5e..479f67056304706616e1870b6cd1b8fa2388a084 100644 --- a/cpp/src/db/Constants.h +++ b/cpp/src/db/Constants.h @@ -5,19 +5,25 @@ ******************************************************************************/ #pragma once +#include + namespace zilliz { namespace milvus { namespace engine { -constexpr size_t K = 1024UL; -constexpr size_t M = K * K; -constexpr size_t G = K * M; -constexpr size_t T = K * G; +constexpr uint64_t K = 1024UL; +constexpr uint64_t M = K * K; +constexpr uint64_t G = K * M; +constexpr uint64_t T = K * G; -constexpr size_t MAX_TABLE_FILE_MEM = 128 * M; +constexpr uint64_t MAX_TABLE_FILE_MEM = 128 * M; constexpr int VECTOR_TYPE_SIZE = sizeof(float); +static constexpr uint64_t ONE_KB = K; +static constexpr uint64_t ONE_MB = ONE_KB*ONE_KB; +static constexpr uint64_t ONE_GB = ONE_KB*ONE_MB; + } // namespace engine } // namespace milvus } // namespace zilliz diff --git a/cpp/src/db/DB.h b/cpp/src/db/DB.h index 282e7627178c85b8ca1fae766d16d03c3df54131..b143f0c23383a1ce6d11c6152b7486156127431e 100644 --- a/cpp/src/db/DB.h +++ b/cpp/src/db/DB.h @@ -46,6 +46,9 @@ public: virtual Status Size(uint64_t& result) = 0; virtual Status BuildIndex(const std::string& table_id) = 0; + virtual Status CreateIndex(const std::string& table_id, const TableIndex& index) = 0; + virtual Status DescribeIndex(const std::string& table_id, TableIndex& index) = 0; + virtual Status DropIndex(const std::string& table_id) = 0; virtual Status DropAll() = 0; diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index ddee622669bbacad127b53c88e54278cebe0ae42..8a002f29b094d2ec4c3d67c9ac88bd9a00a90cc6 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -6,6 +6,7 @@ #include "DBImpl.h" #include "src/db/meta/SqliteMetaImpl.h" #include "Log.h" +#include "Utils.h" #include "engine/EngineFactory.h" #include "Factories.h" #include "metrics/Metrics.h" @@ -59,25 +60,6 @@ void CollectQueryMetrics(double total_time, size_t nq) { server::Metrics::GetInstance().QueryVectorResponsePerSecondGaugeSet(double (nq) / total_time); } -void CollectFileMetrics(int file_type, size_t file_size, double total_time) { - switch(file_type) { - case meta::TableFileSchema::RAW: - case meta::TableFileSchema::TO_INDEX: { - server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); - server::Metrics::GetInstance().RawFileSizeHistogramObserve(file_size); - server::Metrics::GetInstance().RawFileSizeTotalIncrement(file_size); - server::Metrics::GetInstance().RawFileSizeGaugeSet(file_size); - break; - } - default: { - server::Metrics::GetInstance().SearchIndexDataDurationSecondsHistogramObserve(total_time); - server::Metrics::GetInstance().IndexFileSizeHistogramObserve(file_size); - server::Metrics::GetInstance().IndexFileSizeTotalIncrement(file_size); - server::Metrics::GetInstance().IndexFileSizeGaugeSet(file_size); - break; - } - } -} } @@ -104,13 +86,18 @@ Status DBImpl::DeleteTable(const std::string& table_id, const meta::DatesT& date //dates partly delete files of the table but currently we don't support ENGINE_LOG_DEBUG << "Prepare to delete table " << table_id; - mem_mgr_->EraseMemVector(table_id); //not allow insert - meta_ptr_->DeleteTable(table_id); //soft delete table + if (dates.empty()) { + mem_mgr_->EraseMemVector(table_id); //not allow insert + meta_ptr_->DeleteTable(table_id); //soft delete table + + //scheduler will determine when to delete table files + TaskScheduler& scheduler = TaskScheduler::GetInstance(); + DeleteContextPtr context = std::make_shared(table_id, meta_ptr_); + scheduler.Schedule(context); + } else { + meta_ptr_->DropPartitionsByDates(table_id, dates); + } - //scheduler will determine when to delete table files - TaskScheduler& scheduler = TaskScheduler::GetInstance(); - DeleteContextPtr context = std::make_shared(table_id, meta_ptr_); - scheduler.Schedule(context); return Status::OK(); } @@ -143,7 +130,7 @@ Status DBImpl::PreloadTable(const std::string &table_id) { for(auto &day_files : files) { for (auto &file : day_files.second) { - ExecutionEnginePtr engine = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_); + ExecutionEnginePtr engine = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_, (MetricType)file.metric_type_, file.nlist_); if(engine == nullptr) { ENGINE_LOG_ERROR << "Invalid engine type"; return Status::Error("Invalid engine type"); @@ -204,7 +191,7 @@ Status DBImpl::Query(const std::string &table_id, uint64_t k, uint64_t nq, uint6 Status DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) { - ENGINE_LOG_DEBUG << "Query by vectors"; + ENGINE_LOG_DEBUG << "Query by vectors " << table_id; //get all table files from table meta::DatePartionedTableFilesSchema files; @@ -355,7 +342,7 @@ void DBImpl::StartMetricTask() { server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL); int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); - server::Metrics::GetInstance().CacheUsageGaugeSet(cache_usage*100/cache_total); + server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage*100/cache_total); uint64_t size; Size(size); server::Metrics::GetInstance().DataFileSizeGaugeSet(size); @@ -424,7 +411,8 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, //step 2: merge files ExecutionEnginePtr index = - EngineFactory::Build(table_file.dimension_, table_file.location_, (EngineType)table_file.engine_type_); + EngineFactory::Build(table_file.dimension_, table_file.location_, (EngineType)table_file.engine_type_, + (MetricType)table_file.metric_type_, table_file.nlist_); meta::TableFilesSchema updated; long index_size = 0; @@ -465,12 +453,9 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, } //step 4: update table files state - if (index_size >= options_.index_trigger_size) { - table_file.file_type_ = meta::TableFileSchema::TO_INDEX; - } else { - table_file.file_type_ = meta::TableFileSchema::RAW; - } - table_file.size_ = index_size; + table_file.file_type_ = meta::TableFileSchema::RAW; + table_file.file_size_ = index->PhysicalSize(); + table_file.row_count_ = index->Count(); updated.push_back(table_file); status = meta_ptr_->UpdateTableFiles(updated); ENGINE_LOG_DEBUG << "New merged file " << table_file.file_id_ << @@ -566,7 +551,7 @@ Status DBImpl::BuildIndex(const std::string& table_id) { int times = 1; while (has) { - ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times; + ENGINE_LOG_DEBUG << "Non index files detected in " << table_id << "! Will build index " << times; meta_ptr_->UpdateTableFilesToIndex(table_id); /* StartBuildIndexTask(true); */ std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10*1000, times*100))); @@ -574,11 +559,64 @@ Status DBImpl::BuildIndex(const std::string& table_id) { times++; } return Status::OK(); - /* return BuildIndexByTable(table_id); */ +} + +Status DBImpl::CreateIndex(const std::string& table_id, const TableIndex& index) { + { + std::unique_lock lock(build_index_mutex_); + + //step 1: check index difference + TableIndex old_index; + auto status = DescribeIndex(table_id, old_index); + if(!status.ok()) { + ENGINE_LOG_ERROR << "Failed to get table index info"; + return status; + } + + if(utils::IsSameIndex(old_index, index)) { + ENGINE_LOG_DEBUG << "Same index setting, no need to create index again"; + return Status::OK(); + } + + //step 2: drop old index files + DropIndex(table_id); + + //step 3: update index info + + status = meta_ptr_->UpdateTableIndexParam(table_id, index); + if (!status.ok()) { + ENGINE_LOG_ERROR << "Failed to update table index info"; + return status; + } + } + + bool has = false; + auto status = meta_ptr_->HasNonIndexFiles(table_id, has); + int times = 1; + + while (has) { + ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times; + status = meta_ptr_->UpdateTableFilesToIndex(table_id); + /* StartBuildIndexTask(true); */ + std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10*1000, times*100))); + status = meta_ptr_->HasNonIndexFiles(table_id, has); + times++; + } + return Status::OK(); +} + +Status DBImpl::DescribeIndex(const std::string& table_id, TableIndex& index) { + return meta_ptr_->DescribeTableIndex(table_id, index); +} + +Status DBImpl::DropIndex(const std::string& table_id) { + return meta_ptr_->DropTableIndex(table_id); } Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { - ExecutionEnginePtr to_index = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_); + ExecutionEnginePtr to_index = + EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_, + (MetricType)file.metric_type_, file.nlist_); if(to_index == nullptr) { ENGINE_LOG_ERROR << "Invalid engine type"; return Status::Error("Invalid engine type"); @@ -650,26 +688,27 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { //step 6: update meta table_file.file_type_ = meta::TableFileSchema::INDEX; - table_file.size_ = index->Size(); + table_file.file_size_ = index->PhysicalSize(); + table_file.row_count_ = index->Count(); - auto to_remove = file; - to_remove.file_type_ = meta::TableFileSchema::TO_DELETE; + auto origin_file = file; + origin_file.file_type_ = meta::TableFileSchema::BACKUP; - meta::TableFilesSchema update_files = {table_file, to_remove}; + meta::TableFilesSchema update_files = {table_file, origin_file}; status = meta_ptr_->UpdateTableFiles(update_files); if(status.ok()) { ENGINE_LOG_DEBUG << "New index file " << table_file.file_id_ << " of size " << index->PhysicalSize() << " bytes" - << " from file " << to_remove.file_id_; + << " from file " << origin_file.file_id_; if(options_.insert_cache_immediately_) { index->Cache(); } } else { //failed to update meta, mark the new file as to_delete, don't delete old file - to_remove.file_type_ = meta::TableFileSchema::TO_INDEX; - status = meta_ptr_->UpdateTableFile(to_remove); - ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << to_remove.file_id_ << " to to_index"; + origin_file.file_type_ = meta::TableFileSchema::TO_INDEX; + status = meta_ptr_->UpdateTableFile(origin_file); + ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << origin_file.file_id_ << " to to_index"; table_file.file_type_ = meta::TableFileSchema::TO_DELETE; status = meta_ptr_->UpdateTableFile(table_file); @@ -685,30 +724,6 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { return Status::OK(); } -Status DBImpl::BuildIndexByTable(const std::string& table_id) { - std::unique_lock lock(build_index_mutex_); - meta::TableFilesSchema to_index_files; - meta_ptr_->FilesToIndex(to_index_files); - - Status status; - - for (auto& file : to_index_files) { - status = BuildIndex(file); - if (!status.ok()) { - ENGINE_LOG_ERROR << "Building index for " << file.id_ << " failed: " << status.ToString(); - return status; - } - ENGINE_LOG_DEBUG << "Sync building index for " << file.id_ << " passed"; - - if (shutting_down_.load(std::memory_order_acquire)){ - ENGINE_LOG_DEBUG << "Server will shutdown, skip build index action for table " << table_id; - break; - } - } - - return status; -} - void DBImpl::BackgroundBuildIndex() { ENGINE_LOG_TRACE << " Background build index thread start"; diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index 4aa7ac07bf24345d3d2db6ba5dc82984e243294d..97c36fadbc14e0839f936336668313e34c1e98c1 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -93,6 +93,12 @@ class DBImpl : public DB { Status BuildIndex(const std::string& table_id) override; + Status CreateIndex(const std::string& table_id, const TableIndex& index) override; + + Status DescribeIndex(const std::string& table_id, TableIndex& index) override; + + Status DropIndex(const std::string& table_id) override; + ~DBImpl() override; private: @@ -122,8 +128,6 @@ class DBImpl : public DB { void StartBuildIndexTask(bool force=false); void BackgroundBuildIndex(); - Status - BuildIndexByTable(const std::string& table_id); Status BuildIndex(const meta::TableFileSchema &); diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index bb1056e3c22003b699ecf02a924e7759633fe575..58883d5c7b712b3aa5eab20933cb0cb36ee5293c 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -90,11 +90,11 @@ std::shared_ptr DBMetaImplFactory::Build(const DBMetaOptions& metaOp } } -std::shared_ptr DBFactory::Build() { - auto options = OptionsFactory::Build(); - auto db = DBFactory::Build(options); - return std::shared_ptr(db); -} +//std::shared_ptr DBFactory::Build() { +// auto options = OptionsFactory::Build(); +// auto db = DBFactory::Build(options); +// return std::shared_ptr(db); +//} DB* DBFactory::Build(const Options& options) { return new DBImpl(options); diff --git a/cpp/src/db/Factories.h b/cpp/src/db/Factories.h index 0e6823c385c62187a43b41fb9c2333f8a45a2b17..3c3479e51234ea06bd96bb1b09c2297dc7963cf3 100644 --- a/cpp/src/db/Factories.h +++ b/cpp/src/db/Factories.h @@ -33,7 +33,7 @@ struct DBMetaImplFactory { }; struct DBFactory { - static std::shared_ptr Build(); + //static std::shared_ptr Build(); static DB *Build(const Options &); }; diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index a1ff28419da5c250d041bb2a290935636abd6e4e..80815312366911a402e72c89bedc3f0bf24dc17e 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -5,6 +5,8 @@ ******************************************************************************/ #pragma once +#include "Constants.h" + #include #include #include @@ -16,10 +18,6 @@ namespace engine { class Env; -static constexpr uint64_t ONE_KB = 1024; -static constexpr uint64_t ONE_MB = ONE_KB*ONE_KB; -static constexpr uint64_t ONE_GB = ONE_KB*ONE_MB; - static const char* ARCHIVE_CONF_DISK = "disk"; static const char* ARCHIVE_CONF_DAYS = "days"; diff --git a/cpp/src/db/Types.h b/cpp/src/db/Types.h index acf7feea4081fdc83a6bca860ee8d3780ed37f55..7a276913e3064ef9fc6c048c06054937ba006630 100644 --- a/cpp/src/db/Types.h +++ b/cpp/src/db/Types.h @@ -5,7 +5,10 @@ ******************************************************************************/ #pragma once +#include "db/engine/ExecutionEngine.h" + #include +#include namespace zilliz { namespace milvus { @@ -18,6 +21,12 @@ typedef std::vector IDNumbers; typedef std::vector> QueryResult; typedef std::vector QueryResults; +struct TableIndex { + int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; + int32_t nlist_ = 16384; + int32_t index_file_size_ = 1024; //MB + int32_t metric_type_ = (int)MetricType::L2; +}; } // namespace engine } // namespace milvus diff --git a/cpp/src/db/Utils.cpp b/cpp/src/db/Utils.cpp index 0fe1f76d297d9a1bf4cf41fa36668e9a23586e83..8dd12b0cddfca20f1dcd80bd6768abbbf88d6fd9 100644 --- a/cpp/src/db/Utils.cpp +++ b/cpp/src/db/Utils.cpp @@ -85,16 +85,20 @@ Status CreateTablePath(const DBMetaOptions& options, const std::string& table_id return Status::OK(); } -Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id) { - std::string db_path = options.path; - std::string table_path = db_path + TABLES_FOLDER + table_id; - boost::filesystem::remove_all(table_path); - ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; - - for(auto& path : options.slave_paths) { - table_path = path + TABLES_FOLDER + table_id; - boost::filesystem::remove_all(table_path); - ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; +Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id, bool force) { + std::vector paths = options.slave_paths; + paths.push_back(options.path); + + for(auto& path : paths) { + std::string table_path = path + TABLES_FOLDER + table_id; + if(force) { + boost::filesystem::remove_all(table_path); + ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + } else if(boost::filesystem::exists(table_path) && + boost::filesystem::is_empty(table_path)) { + boost::filesystem::remove_all(table_path); + ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + } } return Status::OK(); @@ -142,6 +146,13 @@ Status DeleteTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& return Status::OK(); } +bool IsSameIndex(const TableIndex& index1, const TableIndex& index2) { + return index1.engine_type_ == index2.engine_type_ + && index1.nlist_ == index2.nlist_ + && index1.index_file_size_ == index2.index_file_size_ + && index1.metric_type_ == index2.metric_type_; +} + } // namespace utils } // namespace engine } // namespace milvus diff --git a/cpp/src/db/Utils.h b/cpp/src/db/Utils.h index 8329f5a1fcab00288d5e98d183d96e94f6850ef2..101d849ca3b16ebf4c0fce61e88dc3d13fd4bb28 100644 --- a/cpp/src/db/Utils.h +++ b/cpp/src/db/Utils.h @@ -7,6 +7,7 @@ #include "Options.h" #include "db/meta/MetaTypes.h" +#include "db/Types.h" #include @@ -18,12 +19,14 @@ namespace utils { long GetMicroSecTimeStamp(); Status CreateTablePath(const DBMetaOptions& options, const std::string& table_id); -Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id); +Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id, bool force = true); Status CreateTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); Status GetTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); Status DeleteTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); +bool IsSameIndex(const TableIndex& index1, const TableIndex& index2); + } // namespace utils } // namespace engine } // namespace milvus diff --git a/cpp/src/db/engine/EngineFactory.cpp b/cpp/src/db/engine/EngineFactory.cpp index d09e9f8b973a557c6015990279ff16cca190b5e5..a326d6a2c66ae484fe78385f7a1cc8af60d2ffa2 100644 --- a/cpp/src/db/engine/EngineFactory.cpp +++ b/cpp/src/db/engine/EngineFactory.cpp @@ -4,7 +4,6 @@ * Proprietary and confidential. ******************************************************************************/ #include "EngineFactory.h" -//#include "FaissExecutionEngine.h" #include "ExecutionEngineImpl.h" #include "db/Log.h" @@ -12,61 +11,25 @@ namespace zilliz { namespace milvus { namespace engine { -#if 0 ExecutionEnginePtr EngineFactory::Build(uint16_t dimension, const std::string &location, - EngineType type) { + EngineType index_type, + MetricType metric_type, + int32_t nlist) { - ExecutionEnginePtr execution_engine_ptr; - - switch (type) { - case EngineType::FAISS_IDMAP: { - execution_engine_ptr = - ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, BUILD_INDEX_TYPE_IDMAP, "IDMap,Flat")); - break; - } - - case EngineType::FAISS_IVFFLAT_GPU: { - execution_engine_ptr = - ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, BUILD_INDEX_TYPE_IVF, "IDMap,Flat")); - break; - } - - case EngineType::FAISS_IVFSQ8: { - execution_engine_ptr = - ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, BUILD_INDEX_TYPE_IVFSQ8, "IDMap,Flat")); - break; - } - - default: { - ENGINE_LOG_ERROR << "Unsupported engine type"; - return nullptr; - } - } - - execution_engine_ptr->Init(); - return execution_engine_ptr; -} -#else -ExecutionEnginePtr -EngineFactory::Build(uint16_t dimension, - const std::string &location, - EngineType type) { - - if(type == EngineType::INVALID) { + if(index_type == EngineType::INVALID) { ENGINE_LOG_ERROR << "Unsupported engine type"; return nullptr; } - ENGINE_LOG_DEBUG << "EngineFactory EngineTypee: " << int(type); + ENGINE_LOG_DEBUG << "EngineFactory EngineTypee: " << (int)index_type; ExecutionEnginePtr execution_engine_ptr = - std::make_shared(dimension, location, type); + std::make_shared(dimension, location, index_type, metric_type, nlist); execution_engine_ptr->Init(); return execution_engine_ptr; } -#endif } } diff --git a/cpp/src/db/engine/EngineFactory.h b/cpp/src/db/engine/EngineFactory.h index d8c35468daa22f9e5ccfec5b0773fa8335ef85f8..7f2047af9b3b2cd22e7c47cd28eb393c58dd7d3d 100644 --- a/cpp/src/db/engine/EngineFactory.h +++ b/cpp/src/db/engine/EngineFactory.h @@ -16,7 +16,9 @@ class EngineFactory { public: static ExecutionEnginePtr Build(uint16_t dimension, const std::string& location, - EngineType type); + EngineType index_type, + MetricType metric_type, + int32_t nlist); }; } diff --git a/cpp/src/db/engine/ExecutionEngine.h b/cpp/src/db/engine/ExecutionEngine.h index 88be75aeb99988d60033bda17e5fef4a4621ff16..f0ce5554ca08842d278411c9db8ab38ad098a87a 100644 --- a/cpp/src/db/engine/ExecutionEngine.h +++ b/cpp/src/db/engine/ExecutionEngine.h @@ -23,6 +23,11 @@ enum class EngineType { MAX_VALUE = NSG_MIX, }; +enum class MetricType { + L2 = 1, + IP = 2, +}; + class ExecutionEngine { public: @@ -59,7 +64,13 @@ public: virtual Status Cache() = 0; + virtual Status GpuCache(uint64_t gpu_id) = 0; + virtual Status Init() = 0; + + virtual EngineType IndexEngineType() const = 0; + + virtual MetricType IndexMetricType() const = 0; }; using ExecutionEnginePtr = std::shared_ptr; diff --git a/cpp/src/db/engine/ExecutionEngineImpl.cpp b/cpp/src/db/engine/ExecutionEngineImpl.cpp index dd38369832f3b94578cfc742dd71486bb33568fb..deaf864e94d7f35af4f39d94355183107ce8fee4 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.cpp +++ b/cpp/src/db/engine/ExecutionEngineImpl.cpp @@ -4,8 +4,8 @@ * Proprietary and confidential. ******************************************************************************/ #include +#include "src/cache/GpuCacheMgr.h" -#include "src/server/ServerConfig.h" #include "src/metrics/Metrics.h" #include "db/Log.h" #include "utils/CommonUtil.h" @@ -22,26 +22,23 @@ namespace zilliz { namespace milvus { namespace engine { -namespace { -std::string GetMetricType() { - server::ServerConfig &config = server::ServerConfig::GetInstance(); - server::ConfigNode engine_config = config.GetConfig(server::CONFIG_ENGINE); - return engine_config.GetValue(server::CONFIG_METRICTYPE, "L2"); -} -} - ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, const std::string &location, - EngineType type) - : location_(location), dim(dimension), build_type(type) { - current_type = EngineType::FAISS_IDMAP; + EngineType index_type, + MetricType metric_type, + int32_t nlist) + : location_(location), + dim_(dimension), + index_type_(index_type), + metric_type_(metric_type), + nlist_(nlist) { index_ = CreatetVecIndex(EngineType::FAISS_IDMAP); if (!index_) throw Exception("Create Empty VecIndex"); Config build_cfg; build_cfg["dim"] = dimension; - build_cfg["metric_type"] = GetMetricType(); + build_cfg["metric_type"] = (metric_type_ == MetricType::IP) ? "IP" : "L2"; AutoGenParams(index_->GetType(), 0, build_cfg); auto ec = std::static_pointer_cast(index_)->Build(build_cfg); if (ec != server::KNOWHERE_SUCCESS) { throw Exception("Build index error"); } @@ -49,9 +46,14 @@ ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index, const std::string &location, - EngineType type) - : index_(std::move(index)), location_(location), build_type(type) { - current_type = type; + EngineType index_type, + MetricType metric_type, + int32_t nlist) + : index_(std::move(index)), + location_(location), + index_type_(index_type), + metric_type_(metric_type), + nlist_(nlist) { } VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { @@ -144,28 +146,60 @@ Status ExecutionEngineImpl::Load(bool to_cache) { } Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) { - try { - index_ = index_->CopyToGpu(device_id); - ENGINE_LOG_DEBUG << "CPU to GPU" << device_id; - } catch (knowhere::KnowhereException &e) { - ENGINE_LOG_ERROR << e.what(); - return Status::Error(e.what()); - } catch (std::exception &e) { - return Status::Error(e.what()); + index_ = zilliz::milvus::cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_); + bool already_in_cache = (index_ != nullptr); + auto start_time = METRICS_NOW_TIME; + if (!index_) { + try { + index_ = index_->CopyToGpu(device_id); + ENGINE_LOG_DEBUG << "CPU to GPU" << device_id; + } catch (knowhere::KnowhereException &e) { + ENGINE_LOG_ERROR << e.what(); + return Status::Error(e.what()); + } catch (std::exception &e) { + return Status::Error(e.what()); + } + } + + if (!already_in_cache) { + GpuCache(device_id); + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + double physical_size = PhysicalSize(); + + server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time); + server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size); } + return Status::OK(); } Status ExecutionEngineImpl::CopyToCpu() { - try { - index_ = index_->CopyToCpu(); - ENGINE_LOG_DEBUG << "GPU to CPU"; - } catch (knowhere::KnowhereException &e) { - ENGINE_LOG_ERROR << e.what(); - return Status::Error(e.what()); - } catch (std::exception &e) { - return Status::Error(e.what()); + index_ = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_); + bool already_in_cache = (index_ != nullptr); + auto start_time = METRICS_NOW_TIME; + if (!index_) { + try { + index_ = index_->CopyToCpu(); + ENGINE_LOG_DEBUG << "GPU to CPU"; + } catch (knowhere::KnowhereException &e) { + ENGINE_LOG_ERROR << e.what(); + return Status::Error(e.what()); + } catch (std::exception &e) { + return Status::Error(e.what()); + } + } + + if(!already_in_cache) { + Cache(); + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + double physical_size = PhysicalSize(); + + server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time); + server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size); } + return Status::OK(); } @@ -204,15 +238,15 @@ ExecutionEngineImpl::BuildIndex(const std::string &location) { ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_; auto from_index = std::dynamic_pointer_cast(index_); - auto to_index = CreatetVecIndex(build_type); + auto to_index = CreatetVecIndex(index_type_); if (!to_index) { throw Exception("Create Empty VecIndex"); } Config build_cfg; build_cfg["dim"] = Dimension(); - build_cfg["metric_type"] = GetMetricType(); - build_cfg["gpu_id"] = gpu_num; + build_cfg["metric_type"] = (metric_type_ == MetricType::IP) ? "IP" : "L2"; + build_cfg["gpu_id"] = gpu_num_; build_cfg["nlist"] = nlist_; AutoGenParams(to_index->GetType(), Count(), build_cfg); @@ -222,7 +256,7 @@ ExecutionEngineImpl::BuildIndex(const std::string &location) { build_cfg); if (ec != server::KNOWHERE_SUCCESS) { throw Exception("Build index error"); } - return std::make_shared(to_index, location, build_type); + return std::make_shared(to_index, location, index_type_, metric_type_, nlist_); } Status ExecutionEngineImpl::Search(long n, @@ -246,21 +280,16 @@ Status ExecutionEngineImpl::Cache() { return Status::OK(); } +Status ExecutionEngineImpl::GpuCache(uint64_t gpu_id) { + zilliz::milvus::cache::GpuCacheMgr::GetInstance(gpu_id)->InsertItem(location_, index_); +} + // TODO(linxj): remove. Status ExecutionEngineImpl::Init() { using namespace zilliz::milvus::server; ServerConfig &config = ServerConfig::GetInstance(); ConfigNode server_config = config.GetConfig(CONFIG_SERVER); - gpu_num = server_config.GetInt32Value("gpu_index", 0); - - switch (build_type) { - case EngineType::FAISS_IVFSQ8: - case EngineType::FAISS_IVFFLAT: { - ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); - nlist_ = engine_config.GetInt32Value(CONFIG_NLIST, 16384); - break; - } - } + gpu_num_ = server_config.GetInt32Value("gpu_index", 0); return Status::OK(); } diff --git a/cpp/src/db/engine/ExecutionEngineImpl.h b/cpp/src/db/engine/ExecutionEngineImpl.h index 948719310cd380a984fafc1d51c6a96a354b162a..90be5bddf286c0d7b2c48e60c62a69a1657f1a6a 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.h +++ b/cpp/src/db/engine/ExecutionEngineImpl.h @@ -22,11 +22,15 @@ public: ExecutionEngineImpl(uint16_t dimension, const std::string &location, - EngineType type); + EngineType index_type, + MetricType metric_type, + int32_t nlist); ExecutionEngineImpl(VecIndexPtr index, const std::string &location, - EngineType type); + EngineType index_type, + MetricType metric_type, + int32_t nlist); Status AddWithIds(long n, const float *xdata, const long *xids) override; @@ -59,8 +63,14 @@ public: Status Cache() override; + Status GpuCache(uint64_t gpu_id) override; + Status Init() override; + EngineType IndexEngineType() const override { return index_type_; } + + MetricType IndexMetricType() const override { return metric_type_; } + private: VecIndexPtr CreatetVecIndex(EngineType type); @@ -68,14 +78,14 @@ private: protected: VecIndexPtr index_ = nullptr; - EngineType build_type; - EngineType current_type; + EngineType index_type_; + MetricType metric_type_; - int64_t dim; + int64_t dim_; std::string location_; - size_t nlist_ = 0; - int64_t gpu_num = 0; + int32_t nlist_ = 0; + int64_t gpu_num_ = 0; }; diff --git a/cpp/src/db/insert/MemManagerImpl.cpp b/cpp/src/db/insert/MemManagerImpl.cpp index bd71f874da9a0e4b4fbe6c07c9e6c7b73b2f7286..7c0110e56b5f80aac783dc1f115c60b092072e10 100644 --- a/cpp/src/db/insert/MemManagerImpl.cpp +++ b/cpp/src/db/insert/MemManagerImpl.cpp @@ -42,9 +42,11 @@ Status MemManagerImpl::InsertVectorsNoLock(const std::string &table_id, MemTablePtr mem = GetMemByTable(table_id); VectorSource::Ptr source = std::make_shared(n, vectors); - auto status = mem->Add(source); + auto status = mem->Add(source, vector_ids); if (status.ok()) { - vector_ids = source->GetVectorIds(); + if (vector_ids.empty()) { + vector_ids = source->GetVectorIds(); + } } return status; } diff --git a/cpp/src/db/insert/MemTable.cpp b/cpp/src/db/insert/MemTable.cpp index 38206e25fd8615903cdae774b394402240733a0d..ff9c25e3e732154891ea148c4c692dbb51af9faf 100644 --- a/cpp/src/db/insert/MemTable.cpp +++ b/cpp/src/db/insert/MemTable.cpp @@ -15,7 +15,7 @@ MemTable::MemTable(const std::string &table_id, } -Status MemTable::Add(VectorSource::Ptr &source) { +Status MemTable::Add(VectorSource::Ptr &source, IDNumbers &vector_ids) { while (!source->AllAdded()) { @@ -27,12 +27,12 @@ Status MemTable::Add(VectorSource::Ptr &source) { Status status; if (mem_table_file_list_.empty() || current_mem_table_file->IsFull()) { MemTableFile::Ptr new_mem_table_file = std::make_shared(table_id_, meta_, options_); - status = new_mem_table_file->Add(source); + status = new_mem_table_file->Add(source, vector_ids); if (status.ok()) { mem_table_file_list_.emplace_back(new_mem_table_file); } } else { - status = current_mem_table_file->Add(source); + status = current_mem_table_file->Add(source, vector_ids); } if (!status.ok()) { diff --git a/cpp/src/db/insert/MemTable.h b/cpp/src/db/insert/MemTable.h index 4f0cdb7d2a113fd75ad0a3f34ba9dd0d435c2088..7b2d93ffe8d2536bf04a776629fad7f1d83947a9 100644 --- a/cpp/src/db/insert/MemTable.h +++ b/cpp/src/db/insert/MemTable.h @@ -21,7 +21,7 @@ class MemTable { MemTable(const std::string &table_id, const std::shared_ptr &meta, const Options &options); - Status Add(VectorSource::Ptr &source); + Status Add(VectorSource::Ptr &source, IDNumbers &vector_ids); void GetCurrentMemTableFile(MemTableFile::Ptr &mem_table_file); diff --git a/cpp/src/db/insert/MemTableFile.cpp b/cpp/src/db/insert/MemTableFile.cpp index 1d7053ab5a2c9b33e111dd74aaf52c8a6f9da84b..f8f79c8618bcf1afafbc7258da24b720e89c13d1 100644 --- a/cpp/src/db/insert/MemTableFile.cpp +++ b/cpp/src/db/insert/MemTableFile.cpp @@ -23,7 +23,9 @@ MemTableFile::MemTableFile(const std::string &table_id, if (status.ok()) { execution_engine_ = EngineFactory::Build(table_file_schema_.dimension_, table_file_schema_.location_, - (EngineType) table_file_schema_.engine_type_); + (EngineType) table_file_schema_.engine_type_, + (MetricType)table_file_schema_.metric_type_, + table_file_schema_.nlist_); } } @@ -41,7 +43,7 @@ Status MemTableFile::CreateTableFile() { return status; } -Status MemTableFile::Add(const VectorSource::Ptr &source) { +Status MemTableFile::Add(const VectorSource::Ptr &source, IDNumbers& vector_ids) { if (table_file_schema_.dimension_ <= 0) { std::string err_msg = "MemTableFile::Add: table_file_schema dimension = " + @@ -55,7 +57,7 @@ Status MemTableFile::Add(const VectorSource::Ptr &source) { if (mem_left >= single_vector_mem_size) { size_t num_vectors_to_add = std::ceil(mem_left / single_vector_mem_size); size_t num_vectors_added; - auto status = source->Add(execution_engine_, table_file_schema_, num_vectors_to_add, num_vectors_added); + auto status = source->Add(execution_engine_, table_file_schema_, num_vectors_to_add, num_vectors_added, vector_ids); if (status.ok()) { current_mem_ += (num_vectors_added * single_vector_mem_size); } @@ -86,7 +88,9 @@ Status MemTableFile::Serialize() { execution_engine_->Serialize(); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - table_file_schema_.size_ = size; + + table_file_schema_.file_size_ = execution_engine_->PhysicalSize(); + table_file_schema_.row_count_ = execution_engine_->Count(); server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size / total_time); diff --git a/cpp/src/db/insert/MemTableFile.h b/cpp/src/db/insert/MemTableFile.h index b582152299fb90e64a75d10c03404ab941d966f7..d754b030713ba8a75a7369f7be5d5e6e87e41b7e 100644 --- a/cpp/src/db/insert/MemTableFile.h +++ b/cpp/src/db/insert/MemTableFile.h @@ -19,7 +19,7 @@ class MemTableFile { MemTableFile(const std::string &table_id, const std::shared_ptr &meta, const Options &options); - Status Add(const VectorSource::Ptr &source); + Status Add(const VectorSource::Ptr &source, IDNumbers& vector_ids); size_t GetCurrentMem(); diff --git a/cpp/src/db/insert/VectorSource.cpp b/cpp/src/db/insert/VectorSource.cpp index f36eeb09bd4e5ae8d01fac1d429b875c71c5674c..27385b4b230303bdf9a8c7877648410ce71c4f4a 100644 --- a/cpp/src/db/insert/VectorSource.cpp +++ b/cpp/src/db/insert/VectorSource.cpp @@ -12,23 +12,31 @@ namespace engine { VectorSource::VectorSource(const size_t &n, const float *vectors) : - n_(n), - vectors_(vectors), - id_generator_(new SimpleIDGenerator()) { + n_(n), + vectors_(vectors), + id_generator_(std::make_shared()) { current_num_vectors_added = 0; } Status VectorSource::Add(const ExecutionEnginePtr &execution_engine, const meta::TableFileSchema &table_file_schema, const size_t &num_vectors_to_add, - size_t &num_vectors_added) { + size_t &num_vectors_added, + IDNumbers &vector_ids) { auto start_time = METRICS_NOW_TIME; num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; IDNumbers vector_ids_to_add; - id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add); + if (vector_ids.empty()) { + id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add); + } else { + vector_ids_to_add.resize(num_vectors_added); + for (int pos = current_num_vectors_added; pos < current_num_vectors_added + num_vectors_added; pos++) { + vector_ids_to_add[pos-current_num_vectors_added] = vector_ids[pos]; + } + } Status status = execution_engine->AddWithIds(num_vectors_added, vectors_ + current_num_vectors_added * table_file_schema.dimension_, vector_ids_to_add.data()); diff --git a/cpp/src/db/insert/VectorSource.h b/cpp/src/db/insert/VectorSource.h index 9792772d800ac77931f5a797ce5bea59ea7b96be..4c350c78bcb789ace5a91f8232d06546c5ad4360 100644 --- a/cpp/src/db/insert/VectorSource.h +++ b/cpp/src/db/insert/VectorSource.h @@ -21,7 +21,8 @@ class VectorSource { Status Add(const ExecutionEnginePtr &execution_engine, const meta::TableFileSchema &table_file_schema, const size_t &num_vectors_to_add, - size_t &num_vectors_added); + size_t &num_vectors_added, + IDNumbers &vector_ids); size_t GetNumVectorsAdded(); @@ -37,7 +38,7 @@ class VectorSource { size_t current_num_vectors_added; - IDGenerator *id_generator_; + std::shared_ptr id_generator_; }; //VectorSource diff --git a/cpp/src/db/meta/Meta.h b/cpp/src/db/meta/Meta.h index e88761b4460613cf8899453c19178c0fb9c04099..80ae0fb22ec2adef9b4ccb09637c362e056b55bd 100644 --- a/cpp/src/db/meta/Meta.h +++ b/cpp/src/db/meta/Meta.h @@ -8,6 +8,7 @@ #include "MetaTypes.h" #include "db/Options.h" #include "db/Status.h" +#include "db/Types.h" #include #include @@ -38,6 +39,9 @@ class Meta { virtual Status AllTables(std::vector &table_schema_array) = 0; + virtual Status + UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) = 0; + virtual Status DeleteTable(const std::string &table_id) = 0; @@ -83,6 +87,12 @@ class Meta { virtual Status HasNonIndexFiles(const std::string &table_id, bool &has) = 0; + virtual Status + DescribeTableIndex(const std::string &table_id, TableIndex& index) = 0; + + virtual Status + DropTableIndex(const std::string &table_id) = 0; + virtual Status CleanUp() = 0; diff --git a/cpp/src/db/meta/MetaTypes.h b/cpp/src/db/meta/MetaTypes.h index 9f58734f39d8ce77a0343756dd34ad1026d7fa5a..b0c3376593273de19632a46c6c325dbd0a8e8efb 100644 --- a/cpp/src/db/meta/MetaTypes.h +++ b/cpp/src/db/meta/MetaTypes.h @@ -6,6 +6,7 @@ #pragma once #include "db/engine/ExecutionEngine.h" +#include "db/Constants.h" #include #include @@ -16,6 +17,11 @@ namespace milvus { namespace engine { namespace meta { +constexpr int32_t DEFAULT_ENGINE_TYPE = (int)EngineType::FAISS_IDMAP; +constexpr int32_t DEFAULT_NLIST = 16384; +constexpr int32_t DEFAULT_INDEX_FILE_SIZE = 1024*ONE_MB; +constexpr int32_t DEFAULT_METRIC_TYPE = (int)MetricType::L2; + typedef int DateT; const DateT EmptyDate = -1; typedef std::vector DatesT; @@ -28,12 +34,13 @@ struct TableSchema { size_t id_ = 0; std::string table_id_; - int state_ = (int)NORMAL; - size_t files_cnt_ = 0; + int32_t state_ = (int)NORMAL; uint16_t dimension_ = 0; - long created_on_ = 0; - int engine_type_ = (int)EngineType::FAISS_IDMAP; - bool store_raw_data_ = false; + int64_t created_on_ = 0; + int32_t engine_type_ = DEFAULT_ENGINE_TYPE; + int32_t nlist_ = DEFAULT_NLIST; + int32_t index_file_size_ = DEFAULT_INDEX_FILE_SIZE; + int32_t metric_type_ = DEFAULT_METRIC_TYPE; }; // TableSchema struct TableFileSchema { @@ -45,19 +52,23 @@ struct TableFileSchema { TO_DELETE, NEW_MERGE, NEW_INDEX, + BACKUP, } FILE_TYPE; size_t id_ = 0; std::string table_id_; - int engine_type_ = (int)EngineType::FAISS_IDMAP; std::string file_id_; - int file_type_ = NEW; - size_t size_ = 0; + int32_t file_type_ = NEW; + size_t file_size_ = 0; + size_t row_count_ = 0; DateT date_ = EmptyDate; uint16_t dimension_ = 0; std::string location_; - long updated_time_ = 0; - long created_on_ = 0; + int64_t updated_time_ = 0; + int64_t created_on_ = 0; + int32_t engine_type_ = DEFAULT_ENGINE_TYPE; + int32_t nlist_ = DEFAULT_NLIST; //not persist to meta + int32_t metric_type_ = DEFAULT_METRIC_TYPE; //not persist to meta }; // TableFileSchema typedef std::vector TableFilesSchema; diff --git a/cpp/src/db/meta/MySQLConnectionPool.cpp b/cpp/src/db/meta/MySQLConnectionPool.cpp index b43126920e1814920152c0d9517f55b628d17540..8e82dc5ae7ac4db37fbb559b4a23adf214cfbfea 100644 --- a/cpp/src/db/meta/MySQLConnectionPool.cpp +++ b/cpp/src/db/meta/MySQLConnectionPool.cpp @@ -30,13 +30,13 @@ namespace meta { } } - int MySQLConnectionPool::getConnectionsInUse() { - return conns_in_use_; - } - - void MySQLConnectionPool::set_max_idle_time(int max_idle) { - max_idle_time_ = max_idle; - } +// int MySQLConnectionPool::getConnectionsInUse() { +// return conns_in_use_; +// } +// +// void MySQLConnectionPool::set_max_idle_time(int max_idle) { +// max_idle_time_ = max_idle; +// } std::string MySQLConnectionPool::getDB() { return db_; diff --git a/cpp/src/db/meta/MySQLConnectionPool.h b/cpp/src/db/meta/MySQLConnectionPool.h index 62afd2ddbf6fa650c16c8b9a49d9c466507afae1..9cde818b456c1aa018da392a034fc3fff9ec2bd2 100644 --- a/cpp/src/db/meta/MySQLConnectionPool.h +++ b/cpp/src/db/meta/MySQLConnectionPool.h @@ -44,9 +44,9 @@ public: // Other half of in-use conn count limit void release(const mysqlpp::Connection *pc) override; - int getConnectionsInUse(); - - void set_max_idle_time(int max_idle); +// int getConnectionsInUse(); +// +// void set_max_idle_time(int max_idle); std::string getDB(); diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index bf4589351ecaece83406d8989890ecd32c80ae8d..954c498f7f8e9a12492dca1d2f03b950e42bc7ce 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -32,15 +32,6 @@ namespace meta { using namespace mysqlpp; - - -// - -// - - - - namespace { Status HandleException(const std::string &desc, std::exception &e) { @@ -91,8 +82,6 @@ MySQLMetaImpl::MySQLMetaImpl(const DBMetaOptions &options_, const int &mode) } Status MySQLMetaImpl::Initialize() { - - if (!boost::filesystem::is_directory(options_.path)) { auto ret = boost::filesystem::create_directory(options_.path); if (!ret) { @@ -160,16 +149,16 @@ Status MySQLMetaImpl::Initialize() { } Query InitializeQuery = connectionPtr->query(); - InitializeQuery << "CREATE TABLE IF NOT EXISTS Tables (" << "id BIGINT PRIMARY KEY AUTO_INCREMENT, " << "table_id VARCHAR(255) UNIQUE NOT NULL, " << "state INT NOT NULL, " << "dimension SMALLINT NOT NULL, " << "created_on BIGINT NOT NULL, " << - "files_cnt BIGINT DEFAULT 0 NOT NULL, " << "engine_type INT DEFAULT 1 NOT NULL, " << - "store_raw_data BOOL DEFAULT false NOT NULL);"; + "nlist INT DEFAULT 16384 NOT NULL, " << + "index_file_size INT DEFAULT 1024 NOT NULL, " << + "metric_type INT DEFAULT 1 NOT NULL);"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str(); @@ -183,7 +172,8 @@ Status MySQLMetaImpl::Initialize() { "engine_type INT DEFAULT 1 NOT NULL, " << "file_id VARCHAR(255) NOT NULL, " << "file_type INT DEFAULT 0 NOT NULL, " << - "size BIGINT DEFAULT 0 NOT NULL, " << + "file_size BIGINT DEFAULT 0 NOT NULL, " << + "row_count BIGINT DEFAULT 0 NOT NULL, " << "updated_time BIGINT NOT NULL, " << "created_on BIGINT NOT NULL, " << "date INT DEFAULT -1 NOT NULL);"; @@ -195,13 +185,6 @@ Status MySQLMetaImpl::Initialize() { } } //Scoped Connection - - - - - return Status::OK(); - - } catch (const BadQuery &er) { // Handle any query errors ENGINE_LOG_ERROR << "QUERY ERROR DURING INITIALIZATION" << ": " << er.what(); @@ -217,13 +200,13 @@ Status MySQLMetaImpl::Initialize() { ENGINE_LOG_ERROR << "Wrong URI format. URI = " << uri; return Status::Error("Wrong URI format"); } + + return Status::OK(); } // PXU TODO: Temp solution. Will fix later Status MySQLMetaImpl::DropPartitionsByDates(const std::string &table_id, const DatesT &dates) { - - if (dates.empty()) { return Status::OK(); } @@ -288,12 +271,8 @@ Status MySQLMetaImpl::DropPartitionsByDates(const std::string &table_id, } Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { - - try { - MetricCollector metric; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -301,7 +280,6 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { return Status::Error("Failed to connect to database server"); } - Query createTableQuery = connectionPtr->query(); if (table_schema.table_id_.empty()) { @@ -310,7 +288,6 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { createTableQuery << "SELECT state FROM Tables " << "WHERE table_id = " << quote << table_schema.table_id_ << ";"; - ENGINE_LOG_DEBUG << "MySQLMetaImpl::CreateTable: " << createTableQuery.str(); StoreQueryResult res = createTableQuery.store(); @@ -325,25 +302,19 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { } } - - table_schema.files_cnt_ = 0; table_schema.id_ = -1; table_schema.created_on_ = utils::GetMicroSecTimeStamp(); - std::string id = "NULL"; //auto-increment std::string table_id = table_schema.table_id_; std::string state = std::to_string(table_schema.state_); std::string dimension = std::to_string(table_schema.dimension_); std::string created_on = std::to_string(table_schema.created_on_); - std::string files_cnt = "0"; std::string engine_type = std::to_string(table_schema.engine_type_); - std::string store_raw_data = table_schema.store_raw_data_ ? "true" : "false"; createTableQuery << "INSERT INTO Tables VALUES" << "(" << id << ", " << quote << table_id << ", " << state << ", " << dimension << ", " << - created_on << ", " << files_cnt << ", " << engine_type << ", " << store_raw_data << ");"; - + created_on << ", " << engine_type << ");"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::CreateTable: " << createTableQuery.str(); @@ -351,19 +322,12 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { table_schema.id_ = res.insert_id(); //Might need to use SELECT LAST_INSERT_ID()? //Consume all results to avoid "Commands out of sync" error - - - } else { ENGINE_LOG_ERROR << "Add Table Error"; return Status::DBTransactionError("Add Table Error", createTableQuery.error()); } } //Scoped Connection - - - - return utils::CreateTablePath(options_, table_schema.table_id_); } catch (const BadQuery &er) { @@ -377,18 +341,13 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { } catch (std::exception &e) { return HandleException("Encounter exception when create table", e); } - - return Status::OK(); } Status MySQLMetaImpl::HasNonIndexFiles(const std::string &table_id, bool &has) { - has = false; try { - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -396,7 +355,6 @@ Status MySQLMetaImpl::HasNonIndexFiles(const std::string &table_id, bool &has) { return Status::Error("Failed to connect to database server"); } - Query hasNonIndexFilesQuery = connectionPtr->query(); //since table_id is a unique column we just need to check whether it exists or not hasNonIndexFilesQuery << "SELECT EXISTS " << @@ -430,11 +388,122 @@ Status MySQLMetaImpl::HasNonIndexFiles(const std::string &table_id, bool &has) { return Status::OK(); } -Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { +Status MySQLMetaImpl::UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) { + try { + MetricCollector metric; + + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + Query updateTableIndexParamQuery = connectionPtr->query(); + updateTableIndexParamQuery << "SELECT id, state, dimension, created_on " << + "FROM Tables " << + "WHERE table_id = " << quote << table_id << " AND " << + "state <> " << std::to_string(TableSchema::TO_DELETE) << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::UpdateTableIndexParam: " << updateTableIndexParamQuery.str(); + + StoreQueryResult res = updateTableIndexParamQuery.store(); + if (res.num_rows() == 1) { + const Row &resRow = res[0]; + + size_t id = resRow["id"]; + int32_t state = resRow["state"]; + uint16_t dimension = resRow["dimension"]; + int64_t created_on = resRow["created_on"]; + + updateTableIndexParamQuery << "UPDATE Tables " << + "SET id = " << id << ", " << + "state = " << state << ", " << + "dimension = " << dimension << ", " << + "created_on = " << created_on << ", " << + "engine_type_ = " << index.engine_type_ << ", " << + "nlist = " << index.nlist_ << ", " << + "index_file_size = " << index.index_file_size_*ONE_MB << ", " << + "metric_type = " << index.metric_type_ << ", " << + "WHERE id = " << quote << table_id << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::UpdateTableIndexParam: " << updateTableIndexParamQuery.str(); + + + if (!updateTableIndexParamQuery.exec()) { + ENGINE_LOG_ERROR << "QUERY ERROR WHEN UPDATING TABLE INDEX PARAM"; + return Status::DBTransactionError("QUERY ERROR WHEN UPDATING TABLE INDEX PARAM", + updateTableIndexParamQuery.error()); + } + } else { + return Status::NotFound("Table " + table_id + " not found"); + } + + } //Scoped Connection + + } catch (const BadQuery &er) { + // Handle any query errors + ENGINE_LOG_ERROR << "QUERY ERROR WHEN UPDATING TABLE INDEX PARAM" << ": " << er.what(); + return Status::DBTransactionError("QUERY ERROR WHEN UPDATING TABLE INDEX PARAM", er.what()); + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN UPDATING TABLE INDEX PARAM" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN UPDATING TABLE INDEX PARAM", er.what()); + } + + return Status::OK(); +} + +Status MySQLMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex& index) { try { + MetricCollector metric; + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + + Query describeTableIndexQuery = connectionPtr->query(); + describeTableIndexQuery << "SELECT engine_type, nlist, index_file_size, metric_type " << + "FROM Tables " << + "WHERE table_id = " << quote << table_id << " AND " << + "state <> " << std::to_string(TableSchema::TO_DELETE) << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::DescribeTableIndex: " << describeTableIndexQuery.str(); + + StoreQueryResult res = describeTableIndexQuery.store(); + + if (res.num_rows() == 1) { + const Row &resRow = res[0]; + + index.engine_type_ = resRow["engine_type"]; + index.nlist_ = resRow["nlist"]; + index.index_file_size_ = resRow["index_file_size"]/ONE_MB; + index.metric_type_ = resRow["metric_type"]; + } else { + return Status::NotFound("Table " + table_id + " not found"); + } + + } //Scoped Connection + + } catch (const BadQuery &er) { + // Handle any query errors + ENGINE_LOG_ERROR << "QUERY ERROR WHEN DESCRIBE TABLE INDEX" << ": " << er.what(); + return Status::DBTransactionError("QUERY ERROR WHEN DESCRIBE TABLE INDEX", er.what()); + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN DESCRIBE TABLE INDEX" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN DESCRIBE TABLE INDEX", er.what()); + } + + return Status::OK(); +} + +Status MySQLMetaImpl::DropTableIndex(const std::string &table_id) { + try { MetricCollector metric; { @@ -444,9 +513,60 @@ Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { return Status::Error("Failed to connect to database server"); } + Query dropTableIndexQuery = connectionPtr->query(); + + dropTableIndexQuery << "UPDATE TableFiles " << + "SET file_type = " << std::to_string(TableFileSchema::TO_DELETE) << "," << + "updated_time = " << utils::GetMicroSecTimeStamp() << " " << + "WHERE table_id = " << quote << table_id << " AND " << + "file_type = " << std::to_string(TableFileSchema::INDEX) << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::DropTableIndex: " << dropTableIndexQuery.str(); + + if (!dropTableIndexQuery.exec()) { + ENGINE_LOG_ERROR << "QUERY ERROR WHEN DROP TABLE INDEX"; + return Status::DBTransactionError("QUERY ERROR WHEN DROP TABLE INDEX", + dropTableIndexQuery.error()); + } + + dropTableIndexQuery << "UPDATE TableFiles " << + "SET file_type = " << std::to_string(TableFileSchema::RAW) << "," << + "updated_time = " << utils::GetMicroSecTimeStamp() << " " << + "WHERE table_id = " << quote << table_id << " AND " << + "file_type = " << std::to_string(TableFileSchema::BACKUP) << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::DropTableIndex: " << dropTableIndexQuery.str(); + + if (!dropTableIndexQuery.exec()) { + ENGINE_LOG_ERROR << "QUERY ERROR WHEN DROP TABLE INDEX"; + return Status::DBTransactionError("QUERY ERROR WHEN DROP TABLE INDEX", + dropTableIndexQuery.error()); + } + + } //Scoped Connection + + } catch (const BadQuery &er) { + // Handle any query errors + ENGINE_LOG_ERROR << "QUERY ERROR WHEN DROP TABLE INDEX" << ": " << er.what(); + return Status::DBTransactionError("QUERY ERROR WHEN DROP TABLE INDEX", er.what()); + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN DROP TABLE INDEX" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN DROP TABLE INDEX", er.what()); + } + return Status::OK(); +} +Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { + try { + MetricCollector metric; + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } //soft delete table Query deleteTableQuery = connectionPtr->query(); @@ -464,7 +584,6 @@ Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { } //Scoped Connection - if (mode_ == Options::MODE::CLUSTER) { DeleteTableFiles(table_id); } @@ -485,7 +604,6 @@ Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { Status MySQLMetaImpl::DeleteTableFiles(const std::string &table_id) { try { MetricCollector metric; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -493,10 +611,6 @@ Status MySQLMetaImpl::DeleteTableFiles(const std::string &table_id) { return Status::Error("Failed to connect to database server"); } - - - - //soft delete table files Query deleteTableFilesQuery = connectionPtr->query(); // @@ -527,14 +641,9 @@ Status MySQLMetaImpl::DeleteTableFiles(const std::string &table_id) { } Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { - - try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -542,9 +651,8 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { return Status::Error("Failed to connect to database server"); } - Query describeTableQuery = connectionPtr->query(); - describeTableQuery << "SELECT id, dimension, files_cnt, engine_type, store_raw_data " << + describeTableQuery << "SELECT id, state, dimension, engine_type, nlist, index_file_size, metric_type " << "FROM Tables " << "WHERE table_id = " << quote << table_schema.table_id_ << " " << "AND state <> " << std::to_string(TableSchema::TO_DELETE) << ";"; @@ -559,14 +667,17 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { table_schema.id_ = resRow["id"]; //implicit conversion - table_schema.dimension_ = resRow["dimension"]; + table_schema.state_ = resRow["state"]; - table_schema.files_cnt_ = resRow["files_cnt"]; + table_schema.dimension_ = resRow["dimension"]; table_schema.engine_type_ = resRow["engine_type"]; - int store_raw_data = resRow["store_raw_data"]; - table_schema.store_raw_data_ = (store_raw_data == 1); + table_schema.nlist_ = resRow["nlist"]; + + table_schema.index_file_size_ = resRow["index_file_size"]; + + table_schema.metric_type_ = resRow["metric_type"]; } else { return Status::NotFound("Table " + table_schema.table_id_ + " not found"); } @@ -585,14 +696,9 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { } Status MySQLMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { - - try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -600,7 +706,6 @@ Status MySQLMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { return Status::Error("Failed to connect to database server"); } - Query hasTableQuery = connectionPtr->query(); //since table_id is a unique column we just need to check whether it exists or not hasTableQuery << "SELECT EXISTS " << @@ -631,14 +736,9 @@ Status MySQLMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { } Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { - - try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -646,9 +746,8 @@ Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { return Status::Error("Failed to connect to database server"); } - Query allTablesQuery = connectionPtr->query(); - allTablesQuery << "SELECT id, table_id, dimension, files_cnt, engine_type, store_raw_data " << + allTablesQuery << "SELECT id, table_id, dimension, engine_type, nlist, index_file_size, metric_type " << "FROM Tables " << "WHERE state <> " << std::to_string(TableSchema::TO_DELETE) << ";"; @@ -668,12 +767,13 @@ Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { table_schema.dimension_ = resRow["dimension"]; - table_schema.files_cnt_ = resRow["files_cnt"]; - table_schema.engine_type_ = resRow["engine_type"]; - int store_raw_data = resRow["store_raw_data"]; - table_schema.store_raw_data_ = (store_raw_data == 1); + table_schema.nlist_ = resRow["nlist"]; + + table_schema.index_file_size_ = resRow["index_file_size"]; + + table_schema.metric_type_ = resRow["metric_type"]; table_schema_array.emplace_back(table_schema); } @@ -691,8 +791,6 @@ Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { } Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { - - if (file_schema.date_ == EmptyDate) { file_schema.date_ = Meta::GetDate(); } @@ -704,15 +802,17 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { } try { - MetricCollector metric; NextFileId(file_schema.file_id_); file_schema.dimension_ = table_schema.dimension_; - file_schema.size_ = 0; + file_schema.file_size_ = 0; + file_schema.row_count_ = 0; file_schema.created_on_ = utils::GetMicroSecTimeStamp(); file_schema.updated_time_ = file_schema.created_on_; file_schema.engine_type_ = table_schema.engine_type_; + file_schema.nlist_ = table_schema.nlist_; + file_schema.metric_type_ = table_schema.metric_type_; utils::GetTableFilePath(options_, file_schema); std::string id = "NULL"; //auto-increment @@ -720,7 +820,7 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { std::string engine_type = std::to_string(file_schema.engine_type_); std::string file_id = file_schema.file_id_; std::string file_type = std::to_string(file_schema.file_type_); - std::string size = std::to_string(file_schema.size_); + std::string row_count = std::to_string(file_schema.row_count_); std::string updated_time = std::to_string(file_schema.updated_time_); std::string created_on = std::to_string(file_schema.created_on_); std::string date = std::to_string(file_schema.date_); @@ -732,12 +832,11 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { return Status::Error("Failed to connect to database server"); } - Query createTableFileQuery = connectionPtr->query(); createTableFileQuery << "INSERT INTO TableFiles VALUES" << "(" << id << ", " << quote << table_id << ", " << engine_type << ", " << - quote << file_id << ", " << file_type << ", " << size << ", " << + quote << file_id << ", " << file_type << ", " << row_count << ", " << updated_time << ", " << created_on << ", " << date << ");"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::CreateTableFile: " << createTableFileQuery.str(); @@ -746,9 +845,6 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { file_schema.id_ = res.insert_id(); //Might need to use SELECT LAST_INSERT_ID()? //Consume all results to avoid "Commands out of sync" error - - - } else { ENGINE_LOG_ERROR << "QUERY ERROR WHEN ADDING TABLE FILE"; return Status::DBTransactionError("Add file Error", createTableFileQuery.error()); @@ -768,21 +864,14 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { } catch (std::exception &ex) { return HandleException("Encounter exception when create table file", ex); } - - return Status::OK(); } Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { - - files.clear(); try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -790,9 +879,8 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { return Status::Error("Failed to connect to database server"); } - Query filesToIndexQuery = connectionPtr->query(); - filesToIndexQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToIndexQuery << "SELECT id, table_id, engine_type, file_id, file_type, file_size, row_count, date, created_on " << "FROM TableFiles " << "WHERE file_type = " << std::to_string(TableFileSchema::TO_INDEX) << ";"; @@ -819,10 +907,14 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.file_size_ = resRow["file_size"]; + + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; + table_file.created_on_ = resRow["created_on"]; + auto groupItr = groups.find(table_file.table_id_); if (groupItr == groups.end()) { TableSchema table_schema; @@ -834,6 +926,8 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { groups[table_file.table_id_] = table_schema; } + table_file.metric_type_ = groups[table_file.table_id_].metric_type_; + table_file.nlist_ = groups[table_file.table_id_].nlist_; table_file.dimension_ = groups[table_file.table_id_].dimension_; utils::GetTableFilePath(options_, table_file); @@ -856,16 +950,11 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, const DatesT &partition, DatePartionedTableFilesSchema &files) { - - files.clear(); try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -873,11 +962,10 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, return Status::Error("Failed to connect to database server"); } - if (partition.empty()) { Query filesToSearchQuery = connectionPtr->query(); - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, file_size, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "(file_type = " << std::to_string(TableFileSchema::RAW) << " OR " << @@ -899,7 +987,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, std::string partitionListStr = partitionListSS.str(); partitionListStr = partitionListStr.substr(0, partitionListStr.size() - 2); //remove the last ", " - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, file_size, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "date IN (" << partitionListStr << ") AND " << @@ -932,13 +1020,19 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, table_file.engine_type_ = resRow["engine_type"]; + table_file.metric_type_ = table_schema.metric_type_; + + table_file.nlist_ = table_schema.nlist_; + std::string file_id; resRow["file_id"].to_string(file_id); table_file.file_id_ = file_id; table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.file_size_ = resRow["file_size"]; + + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; @@ -970,16 +1064,11 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, const std::vector &ids, const DatesT &partition, DatePartionedTableFilesSchema &files) { - - files.clear(); try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -988,7 +1077,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, } Query filesToSearchQuery = connectionPtr->query(); - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, file_size, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id; @@ -1043,13 +1132,19 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, table_file.engine_type_ = resRow["engine_type"]; + table_file.metric_type_ = table_schema.metric_type_; + + table_file.nlist_ = table_schema.nlist_; + std::string file_id; resRow["file_id"].to_string(file_id); table_file.file_id_ = file_id; table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.file_size_ = resRow["file_size"]; + + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; @@ -1079,15 +1174,20 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, DatePartionedTableFilesSchema &files) { - - files.clear(); try { MetricCollector metric; - StoreQueryResult res; + //check table existence + TableSchema table_schema; + table_schema.table_id_ = table_id; + auto status = DescribeTable(table_schema); + if (!status.ok()) { + return status; + } + StoreQueryResult res; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1095,29 +1195,24 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, return Status::Error("Failed to connect to database server"); } - Query filesToMergeQuery = connectionPtr->query(); - filesToMergeQuery << "SELECT id, table_id, file_id, file_type, size, date " << + filesToMergeQuery << "SELECT id, table_id, file_id, file_type, file_size, row_count, date, engine_type, create_on " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "file_type = " << std::to_string(TableFileSchema::RAW) << " " << - "ORDER BY size DESC" << ";"; + "ORDER BY row_count DESC" << ";"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::FilesToMerge: " << filesToMergeQuery.str(); res = filesToMergeQuery.store(); } //Scoped Connection - TableSchema table_schema; - table_schema.table_id_ = table_id; - auto status = DescribeTable(table_schema); - - if (!status.ok()) { - return status; - } - - TableFileSchema table_file; for (auto &resRow : res) { + TableFileSchema table_file; + table_file.file_size_ = resRow["file_size"]; + if(table_file.file_size_ >= table_schema.index_file_size_) { + continue;//skip large file + } table_file.id_ = resRow["id"]; //implicit conversion @@ -1131,10 +1226,18 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; + table_file.engine_type_ = resRow["engine_type"]; + + table_file.metric_type_ = table_schema.metric_type_; + + table_file.nlist_ = table_schema.nlist_; + + table_file.created_on_ = resRow["created_on"]; + table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); @@ -1163,8 +1266,6 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, const std::vector &ids, TableFilesSchema &table_files) { - - if (ids.empty()) { return Status::OK(); } @@ -1177,9 +1278,7 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, idStr = idStr.substr(0, idStr.size() - 4); //remove the last " OR " try { - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1189,7 +1288,7 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, Query getTableFileQuery = connectionPtr->query(); - getTableFileQuery << "SELECT id, engine_type, file_id, file_type, size, date " << + getTableFileQuery << "SELECT id, engine_type, file_id, file_type, file_size, row_count, date, created_on " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "(" << idStr << ");"; @@ -1216,16 +1315,24 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, file_schema.engine_type_ = resRow["engine_type"]; + file_schema.metric_type_ = table_schema.metric_type_; + + file_schema.nlist_ = table_schema.nlist_; + std::string file_id; resRow["file_id"].to_string(file_id); file_schema.file_id_ = file_id; file_schema.file_type_ = resRow["file_type"]; - file_schema.size_ = resRow["size"]; + file_schema.file_size_ = resRow["file_size"]; + + file_schema.row_count_ = resRow["row_count"]; file_schema.date_ = resRow["date"]; + file_schema.created_on_ = resRow["created_on"]; + file_schema.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, file_schema); @@ -1247,8 +1354,6 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, // PXU TODO: Support Swap Status MySQLMetaImpl::Archive() { - - auto &criterias = options_.archive_conf.GetCriterias(); if (criterias.empty()) { return Status::OK(); @@ -1262,14 +1367,12 @@ Status MySQLMetaImpl::Archive() { long now = utils::GetMicroSecTimeStamp(); try { - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { return Status::Error("Failed to connect to database server"); } - Query archiveQuery = connectionPtr->query(); archiveQuery << "UPDATE TableFiles " << "SET file_type = " << std::to_string(TableFileSchema::TO_DELETE) << " " << @@ -1305,13 +1408,10 @@ Status MySQLMetaImpl::Archive() { } Status MySQLMetaImpl::Size(uint64_t &result) { - - result = 0; - try { + try { StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1321,7 +1421,7 @@ Status MySQLMetaImpl::Size(uint64_t &result) { Query getSizeQuery = connectionPtr->query(); - getSizeQuery << "SELECT IFNULL(SUM(size),0) AS sum " << + getSizeQuery << "SELECT IFNULL(SUM(file_size),0) AS sum " << "FROM TableFiles " << "WHERE file_type <> " << std::to_string(TableFileSchema::TO_DELETE) << ";"; @@ -1330,16 +1430,10 @@ Status MySQLMetaImpl::Size(uint64_t &result) { res = getSizeQuery.store(); } //Scoped Connection - -// - - if (res.empty()) { result = 0; - } else { result = res[0]["sum"]; - } } catch (const BadQuery &er) { @@ -1356,8 +1450,6 @@ Status MySQLMetaImpl::Size(uint64_t &result) { } Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { - - if (to_discard_size <= 0) { return Status::OK(); @@ -1365,11 +1457,8 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { ENGINE_LOG_DEBUG << "About to discard size=" << to_discard_size; try { - MetricCollector metric; - bool status; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1377,9 +1466,8 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { return Status::Error("Failed to connect to database server"); } - Query discardFilesQuery = connectionPtr->query(); - discardFilesQuery << "SELECT id, size " << + discardFilesQuery << "SELECT id, file_size " << "FROM TableFiles " << "WHERE file_type <> " << std::to_string(TableFileSchema::TO_DELETE) << " " << "ORDER BY id ASC " << @@ -1387,9 +1475,7 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { ENGINE_LOG_DEBUG << "MySQLMetaImpl::DiscardFiles: " << discardFilesQuery.str(); - StoreQueryResult res = discardFilesQuery.store(); - if (res.num_rows() == 0) { return Status::OK(); } @@ -1401,11 +1487,11 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { break; } table_file.id_ = resRow["id"]; - table_file.size_ = resRow["size"]; + table_file.file_size_ = resRow["file_size"]; idsToDiscardSS << "id = " << std::to_string(table_file.id_) << " OR "; ENGINE_LOG_DEBUG << "Discard table_file.id=" << table_file.file_id_ - << " table_file.size=" << table_file.size_; - to_discard_size -= table_file.size_; + << " table_file.size=" << table_file.file_size_; + to_discard_size -= table_file.file_size_; } std::string idsToDiscardStr = idsToDiscardSS.str(); @@ -1440,13 +1526,10 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { //ZR: this function assumes all fields in file_schema have value Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { - - file_schema.updated_time_ = utils::GetMicroSecTimeStamp(); - try { + try { MetricCollector metric; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1454,7 +1537,6 @@ Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { return Status::Error("Failed to connect to database server"); } - Query updateTableFileQuery = connectionPtr->query(); //if the table has been deleted, just mark the table file as TO_DELETE @@ -1480,7 +1562,8 @@ Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { std::string engine_type = std::to_string(file_schema.engine_type_); std::string file_id = file_schema.file_id_; std::string file_type = std::to_string(file_schema.file_type_); - std::string size = std::to_string(file_schema.size_); + std::string file_size = std::to_string(file_schema.file_size_); + std::string row_count = std::to_string(file_schema.row_count_); std::string updated_time = std::to_string(file_schema.updated_time_); std::string created_on = std::to_string(file_schema.created_on_); std::string date = std::to_string(file_schema.date_); @@ -1490,7 +1573,8 @@ Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { "engine_type = " << engine_type << ", " << "file_id = " << quote << file_id << ", " << "file_type = " << file_type << ", " << - "size = " << size << ", " << + "file_size = " << file_size << ", " << + "row_count = " << row_count << ", " << "updated_time = " << updated_time << ", " << "created_on = " << created_on << ", " << "date = " << date << " " << @@ -1558,11 +1642,8 @@ Status MySQLMetaImpl::UpdateTableFilesToIndex(const std::string &table_id) { } Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { - - try { MetricCollector metric; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1570,7 +1651,6 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { return Status::Error("Failed to connect to database server"); } - Query updateTableFilesQuery = connectionPtr->query(); std::map has_tables; @@ -1606,7 +1686,8 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { std::string engine_type = std::to_string(file_schema.engine_type_); std::string file_id = file_schema.file_id_; std::string file_type = std::to_string(file_schema.file_type_); - std::string size = std::to_string(file_schema.size_); + std::string file_size = std::to_string(file_schema.file_size_); + std::string row_count = std::to_string(file_schema.row_count_); std::string updated_time = std::to_string(file_schema.updated_time_); std::string created_on = std::to_string(file_schema.created_on_); std::string date = std::to_string(file_schema.date_); @@ -1616,7 +1697,8 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { "engine_type = " << engine_type << ", " << "file_id = " << quote << file_id << ", " << "file_type = " << file_type << ", " << - "size = " << size << ", " << + "file_size = " << file_size << ", " << + "row_count = " << row_count << ", " << "updated_time = " << updated_time << ", " << "created_on = " << created_on << ", " << "date = " << date << " " << @@ -1641,26 +1723,25 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { ENGINE_LOG_ERROR << "GENERAL ERROR WHEN UPDATING TABLE FILES" << ": " << er.what(); return Status::DBTransactionError("GENERAL ERROR WHEN UPDATING TABLE FILES", er.what()); } + return Status::OK(); } Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { - - auto now = utils::GetMicroSecTimeStamp(); + std::set table_ids; + + //remove to_delete files try { MetricCollector metric; { - - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { return Status::Error("Failed to connect to database server"); } - Query cleanUpFilesWithTTLQuery = connectionPtr->query(); cleanUpFilesWithTTLQuery << "SELECT id, table_id, file_id, date " << "FROM TableFiles " << @@ -1693,6 +1774,8 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { ENGINE_LOG_DEBUG << "Removing file id:" << table_file.id_ << " location:" << table_file.location_; idsToDelete.emplace_back(std::to_string(table_file.id_)); + + table_ids.insert(table_file.table_id_); } if (!idsToDelete.empty()) { @@ -1727,19 +1810,17 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); } + //remove to_delete tables try { MetricCollector metric; { - - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { return Status::Error("Failed to connect to database server"); } - Query cleanUpFilesWithTTLQuery = connectionPtr->query(); cleanUpFilesWithTTLQuery << "SELECT id, table_id " << "FROM Tables " << @@ -1749,7 +1830,6 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { StoreQueryResult res = cleanUpFilesWithTTLQuery.store(); - if (!res.empty()) { std::stringstream idsToDeleteSS; @@ -1758,7 +1838,7 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { std::string table_id; resRow["table_id"].to_string(table_id); - utils::DeleteTablePath(options_, table_id); + utils::DeleteTablePath(options_, table_id, false);//only delete empty folder idsToDeleteSS << "id = " << std::to_string(id) << " OR "; } @@ -1787,12 +1867,47 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); } + //remove deleted table folder + //don't remove table folder until all its files has been deleted + try { + MetricCollector metric; + + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + + for(auto& table_id : table_ids) { + Query cleanUpFilesWithTTLQuery = connectionPtr->query(); + cleanUpFilesWithTTLQuery << "SELECT file_id " << + "FROM TableFiles " << + "WHERE table_id = " << quote << table_id << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::CleanUpFilesWithTTL: " << cleanUpFilesWithTTLQuery.str(); + + StoreQueryResult res = cleanUpFilesWithTTLQuery.store(); + + if (res.empty()) { + utils::DeleteTablePath(options_, table_id); + } + } + } + } catch (const BadQuery &er) { + // Handle any query errors + ENGINE_LOG_ERROR << "QUERY ERROR WHEN CLEANING UP FILES WITH TTL" << ": " << er.what(); + return Status::DBTransactionError("QUERY ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL", er.what()); + } + return Status::OK(); } Status MySQLMetaImpl::CleanUp() { - - try { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1800,7 +1915,6 @@ Status MySQLMetaImpl::CleanUp() { return Status::Error("Failed to connect to database server"); } - Query cleanUpQuery = connectionPtr->query(); cleanUpQuery << "SELECT table_name " << "FROM information_schema.tables " << @@ -1840,8 +1954,6 @@ Status MySQLMetaImpl::CleanUp() { } Status MySQLMetaImpl::Count(const std::string &table_id, uint64_t &result) { - - try { MetricCollector metric; @@ -1854,7 +1966,6 @@ Status MySQLMetaImpl::Count(const std::string &table_id, uint64_t &result) { } StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1901,24 +2012,21 @@ Status MySQLMetaImpl::Count(const std::string &table_id, uint64_t &result) { ENGINE_LOG_ERROR << "GENERAL ERROR WHEN RETRIEVING COUNT" << ": " << er.what(); return Status::DBTransactionError("GENERAL ERROR WHEN RETRIEVING COUNT", er.what()); } + return Status::OK(); } Status MySQLMetaImpl::DropAll() { - - if (boost::filesystem::is_directory(options_.path)) { boost::filesystem::remove_all(options_.path); } try { - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { return Status::Error("Failed to connect to database server"); } - Query dropTableQuery = connectionPtr->query(); dropTableQuery << "DROP TABLE IF EXISTS Tables, TableFiles;"; @@ -1939,11 +2047,11 @@ Status MySQLMetaImpl::DropAll() { ENGINE_LOG_ERROR << "GENERAL ERROR WHEN DROPPING TABLE" << ": " << er.what(); return Status::DBTransactionError("GENERAL ERROR WHEN DROPPING TABLE", er.what()); } + return Status::OK(); } MySQLMetaImpl::~MySQLMetaImpl() { - if (mode_ != Options::MODE::READ_ONLY) { CleanUp(); } diff --git a/cpp/src/db/meta/MySQLMetaImpl.h b/cpp/src/db/meta/MySQLMetaImpl.h index 30695423ddd39a1ab4f5d63edacf43e5a22ad319..3fdd80beed37b7a7733fae214fc13f51918bcbe9 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.h +++ b/cpp/src/db/meta/MySQLMetaImpl.h @@ -43,6 +43,12 @@ class MySQLMetaImpl : public Meta { Status HasNonIndexFiles(const std::string &table_id, bool &has) override; + Status UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) override; + + Status DescribeTableIndex(const std::string &table_id, TableIndex& index) override; + + Status DropTableIndex(const std::string &table_id) override; + Status UpdateTableFile(TableFileSchema &file_schema) override; Status UpdateTableFilesToIndex(const std::string &table_id) override; diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index 9053139e0b6e2ec2c7b20091488f3b252ba9b532..b4859473ef306bb55cd960367271c703521c1ee4 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -62,16 +62,18 @@ inline auto StoragePrototype(const std::string &path) { make_column("state", &TableSchema::state_), make_column("dimension", &TableSchema::dimension_), make_column("created_on", &TableSchema::created_on_), - make_column("files_cnt", &TableSchema::files_cnt_, default_value(0)), make_column("engine_type", &TableSchema::engine_type_), - make_column("store_raw_data", &TableSchema::store_raw_data_)), + make_column("nlist", &TableSchema::nlist_), + make_column("index_file_size", &TableSchema::index_file_size_), + make_column("metric_type", &TableSchema::metric_type_)), make_table("TableFiles", make_column("id", &TableFileSchema::id_, primary_key()), make_column("table_id", &TableFileSchema::table_id_), make_column("engine_type", &TableFileSchema::engine_type_), make_column("file_id", &TableFileSchema::file_id_), make_column("file_type", &TableFileSchema::file_type_), - make_column("size", &TableFileSchema::size_, default_value(0)), + make_column("file_size", &TableFileSchema::file_size_, default_value(0)), + make_column("row_count", &TableFileSchema::row_count_, default_value(0)), make_column("updated_time", &TableFileSchema::updated_time_), make_column("created_on", &TableFileSchema::created_on_), make_column("date", &TableFileSchema::date_)) @@ -188,7 +190,6 @@ Status SqliteMetaImpl::CreateTable(TableSchema &table_schema) { } } - table_schema.files_cnt_ = 0; table_schema.id_ = -1; table_schema.created_on_ = utils::GetMicroSecTimeStamp(); @@ -217,26 +218,15 @@ Status SqliteMetaImpl::DeleteTable(const std::string& table_id) { std::lock_guard meta_lock(meta_mutex_); //soft delete table - auto tables = ConnectorPtr->select(columns(&TableSchema::id_, - &TableSchema::files_cnt_, - &TableSchema::dimension_, - &TableSchema::engine_type_, - &TableSchema::store_raw_data_, - &TableSchema::created_on_), - where(c(&TableSchema::table_id_) == table_id)); - for (auto &table : tables) { - TableSchema table_schema; - table_schema.table_id_ = table_id; - table_schema.state_ = (int)TableSchema::TO_DELETE; - table_schema.id_ = std::get<0>(table); - table_schema.files_cnt_ = std::get<1>(table); - table_schema.dimension_ = std::get<2>(table); - table_schema.engine_type_ = std::get<3>(table); - table_schema.store_raw_data_ = std::get<4>(table); - table_schema.created_on_ = std::get<5>(table); - - ConnectorPtr->update(table_schema); - } + ConnectorPtr->update_all( + set( + c(&TableSchema::state_) = (int) TableSchema::TO_DELETE + ), + where( + c(&TableSchema::table_id_) == table_id and + c(&TableSchema::state_) != (int) TableSchema::TO_DELETE + )); + } catch (std::exception &e) { return HandleException("Encounter exception when delete table", e); } @@ -274,20 +264,25 @@ Status SqliteMetaImpl::DescribeTable(TableSchema &table_schema) { MetricCollector metric; auto groups = ConnectorPtr->select(columns(&TableSchema::id_, - &TableSchema::table_id_, - &TableSchema::files_cnt_, + &TableSchema::state_, &TableSchema::dimension_, + &TableSchema::created_on_, &TableSchema::engine_type_, - &TableSchema::store_raw_data_), + &TableSchema::nlist_, + &TableSchema::index_file_size_, + &TableSchema::metric_type_), where(c(&TableSchema::table_id_) == table_schema.table_id_ and c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); if (groups.size() == 1) { table_schema.id_ = std::get<0>(groups[0]); - table_schema.files_cnt_ = std::get<2>(groups[0]); - table_schema.dimension_ = std::get<3>(groups[0]); + table_schema.state_ = std::get<1>(groups[0]); + table_schema.dimension_ = std::get<2>(groups[0]); + table_schema.created_on_ = std::get<3>(groups[0]); table_schema.engine_type_ = std::get<4>(groups[0]); - table_schema.store_raw_data_ = std::get<5>(groups[0]); + table_schema.nlist_ = std::get<5>(groups[0]); + table_schema.index_file_size_ = std::get<6>(groups[0]); + table_schema.metric_type_ = std::get<7>(groups[0]); } else { return Status::NotFound("Table " + table_schema.table_id_ + " not found"); } @@ -302,17 +297,16 @@ Status SqliteMetaImpl::DescribeTable(TableSchema &table_schema) { Status SqliteMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) { has = false; try { + std::vector file_types = { + (int) TableFileSchema::RAW, + (int) TableFileSchema::NEW, + (int) TableFileSchema::NEW_MERGE, + (int) TableFileSchema::NEW_INDEX, + (int) TableFileSchema::TO_INDEX, + }; auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::file_type_), - where((c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_MERGE - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_INDEX - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX) + where(in(&TableFileSchema::file_type_, file_types) and c(&TableFileSchema::table_id_) == table_id )); @@ -320,6 +314,7 @@ Status SqliteMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) has = true; int raw_count = 0, new_count = 0, new_merge_count = 0, new_index_count = 0, to_index_count = 0; + std::vector file_ids; for (auto &file : selected) { switch (std::get<1>(file)) { case (int) TableFileSchema::RAW: @@ -353,6 +348,118 @@ Status SqliteMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) return Status::OK(); } +Status SqliteMetaImpl::UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) { + try { + MetricCollector metric; + + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + + auto tables = ConnectorPtr->select(columns(&TableSchema::id_, + &TableSchema::state_, + &TableSchema::dimension_, + &TableSchema::created_on_), + where(c(&TableSchema::table_id_) == table_id + and c(&TableSchema::state_) != (int) TableSchema::TO_DELETE)); + + if(tables.size() > 0) { + meta::TableSchema table_schema; + table_schema.id_ = std::get<0>(tables[0]); + table_schema.table_id_ = table_id; + table_schema.state_ = std::get<1>(tables[0]); + table_schema.dimension_ = std::get<2>(tables[0]); + table_schema.created_on_ = std::get<3>(tables[0]); + table_schema.engine_type_ = index.engine_type_; + table_schema.nlist_ = index.nlist_; + table_schema.index_file_size_ = index.index_file_size_*ONE_MB; + table_schema.metric_type_ = index.metric_type_; + + ConnectorPtr->update(table_schema); + } else { + return Status::NotFound("Table " + table_id + " not found"); + } + + //set all backup file to raw + ConnectorPtr->update_all( + set( + c(&TableFileSchema::file_type_) = (int) TableFileSchema::RAW, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp() + ), + where( + c(&TableFileSchema::table_id_) == table_id and + c(&TableFileSchema::file_type_) == (int) TableFileSchema::BACKUP + )); + + } catch (std::exception &e) { + std::string msg = "Encounter exception when update table index: table_id = " + table_id; + return HandleException(msg, e); + } + return Status::OK(); +} + +Status SqliteMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex& index) { + try { + MetricCollector metric; + + auto groups = ConnectorPtr->select(columns(&TableSchema::engine_type_, + &TableSchema::nlist_, + &TableSchema::index_file_size_, + &TableSchema::metric_type_), + where(c(&TableSchema::table_id_) == table_id + and c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); + + if (groups.size() == 1) { + index.engine_type_ = std::get<0>(groups[0]); + index.nlist_ = std::get<1>(groups[0]); + index.index_file_size_ = std::get<2>(groups[0])/ONE_MB; + index.metric_type_ = std::get<3>(groups[0]); + } else { + return Status::NotFound("Table " + table_id + " not found"); + } + + } catch (std::exception &e) { + return HandleException("Encounter exception when describe index", e); + } + + return Status::OK(); +} + +Status SqliteMetaImpl::DropTableIndex(const std::string &table_id) { + try { + MetricCollector metric; + + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + + //soft delete index files + ConnectorPtr->update_all( + set( + c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp() + ), + where( + c(&TableFileSchema::table_id_) == table_id and + c(&TableFileSchema::file_type_) == (int) TableFileSchema::INDEX + )); + + //set all backup file to raw + ConnectorPtr->update_all( + set( + c(&TableFileSchema::file_type_) = (int) TableFileSchema::RAW, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp() + ), + where( + c(&TableFileSchema::table_id_) == table_id and + c(&TableFileSchema::file_type_) == (int) TableFileSchema::BACKUP + )); + + } catch (std::exception &e) { + return HandleException("Encounter exception when delete table index files", e); + } + + return Status::OK(); +} + Status SqliteMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { has_or_not = false; @@ -379,20 +486,24 @@ Status SqliteMetaImpl::AllTables(std::vector& table_schema_array) { MetricCollector metric; auto selected = ConnectorPtr->select(columns(&TableSchema::id_, - &TableSchema::table_id_, - &TableSchema::files_cnt_, - &TableSchema::dimension_, - &TableSchema::engine_type_, - &TableSchema::store_raw_data_), + &TableSchema::table_id_, + &TableSchema::dimension_, + &TableSchema::created_on_, + &TableSchema::engine_type_, + &TableSchema::nlist_, + &TableSchema::index_file_size_, + &TableSchema::metric_type_), where(c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); for (auto &table : selected) { TableSchema schema; schema.id_ = std::get<0>(table); schema.table_id_ = std::get<1>(table); - schema.files_cnt_ = std::get<2>(table); + schema.created_on_ = std::get<2>(table); schema.dimension_ = std::get<3>(table); schema.engine_type_ = std::get<4>(table); - schema.store_raw_data_ = std::get<5>(table); + schema.nlist_ = std::get<5>(table); + schema.index_file_size_ = std::get<6>(table); + schema.metric_type_ = std::get<7>(table); table_schema_array.emplace_back(schema); } @@ -420,10 +531,13 @@ Status SqliteMetaImpl::CreateTableFile(TableFileSchema &file_schema) { NextFileId(file_schema.file_id_); file_schema.dimension_ = table_schema.dimension_; - file_schema.size_ = 0; + file_schema.file_size_ = 0; + file_schema.row_count_ = 0; file_schema.created_on_ = utils::GetMicroSecTimeStamp(); file_schema.updated_time_ = file_schema.created_on_; file_schema.engine_type_ = table_schema.engine_type_; + file_schema.nlist_ = table_schema.nlist_; + file_schema.metric_type_ = table_schema.metric_type_; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -450,9 +564,11 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::file_size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, - &TableFileSchema::engine_type_), + &TableFileSchema::engine_type_, + &TableFileSchema::created_on_), where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX)); @@ -464,9 +580,11 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); - table_file.date_ = std::get<5>(file); - table_file.engine_type_ = std::get<6>(file); + table_file.file_size_ = std::get<4>(file); + table_file.row_count_ = std::get<5>(file); + table_file.date_ = std::get<6>(file); + table_file.engine_type_ = std::get<7>(file); + table_file.created_on_ = std::get<8>(file); utils::GetTableFilePath(options_, table_file); auto groupItr = groups.find(table_file.table_id_); @@ -479,6 +597,8 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { } groups[table_file.table_id_] = table_schema; } + table_file.metric_type_ = groups[table_file.table_id_].metric_type_; + table_file.nlist_ = groups[table_file.table_id_].nlist_; table_file.dimension_ = groups[table_file.table_id_].dimension_; files.push_back(table_file); } @@ -499,19 +619,17 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, MetricCollector metric; if (partition.empty()) { + std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::file_size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), where(c(&TableFileSchema::table_id_) == table_id and - (c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::TO_INDEX or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::INDEX))); + in(&TableFileSchema::file_type_, file_type))); TableSchema table_schema; table_schema.table_id_ = table_id; @@ -527,9 +645,12 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); - table_file.date_ = std::get<5>(file); - table_file.engine_type_ = std::get<6>(file); + table_file.file_size_ = std::get<4>(file); + table_file.row_count_ = std::get<5>(file); + table_file.date_ = std::get<6>(file); + table_file.engine_type_ = std::get<7>(file); + table_file.metric_type_ = table_schema.metric_type_; + table_file.nlist_ = table_schema.nlist_; table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); @@ -540,20 +661,18 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, } } else { + std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::file_size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), where(c(&TableFileSchema::table_id_) == table_id and - in(&TableFileSchema::date_, partition) and - (c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::TO_INDEX or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::INDEX))); + in(&TableFileSchema::date_, partition) and + in(&TableFileSchema::file_type_, file_type))); TableSchema table_schema; table_schema.table_id_ = table_id; @@ -569,9 +688,12 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); - table_file.date_ = std::get<5>(file); - table_file.engine_type_ = std::get<6>(file); + table_file.file_size_ = std::get<4>(file); + table_file.row_count_ = std::get<5>(file); + table_file.date_ = std::get<6>(file); + table_file.engine_type_ = std::get<7>(file); + table_file.metric_type_ = table_schema.metric_type_; + table_file.nlist_ = table_schema.nlist_; table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); @@ -601,7 +723,8 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::file_size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_); @@ -643,10 +766,13 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); - table_file.date_ = std::get<5>(file); - table_file.engine_type_ = std::get<6>(file); + table_file.file_size_ = std::get<4>(file); + table_file.row_count_ = std::get<5>(file); + table_file.date_ = std::get<6>(file); + table_file.engine_type_ = std::get<7>(file); table_file.dimension_ = table_schema.dimension_; + table_file.metric_type_ = table_schema.metric_type_; + table_file.nlist_ = table_schema.nlist_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); if (dateItr == files.end()) { @@ -669,33 +795,44 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, try { MetricCollector metric; - auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, - &TableFileSchema::table_id_, - &TableFileSchema::file_id_, - &TableFileSchema::file_type_, - &TableFileSchema::size_, - &TableFileSchema::date_), - where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW and - c(&TableFileSchema::table_id_) == table_id), - order_by(&TableFileSchema::size_).desc()); - + //check table existence TableSchema table_schema; table_schema.table_id_ = table_id; auto status = DescribeTable(table_schema); - if (!status.ok()) { return status; } - TableFileSchema table_file; + //get files to merge + auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, + &TableFileSchema::table_id_, + &TableFileSchema::file_id_, + &TableFileSchema::file_type_, + &TableFileSchema::file_size_, + &TableFileSchema::row_count_, + &TableFileSchema::date_, + &TableFileSchema::created_on_), + where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW and + c(&TableFileSchema::table_id_) == table_id), + order_by(&TableFileSchema::file_size_).desc()); + for (auto &file : selected) { + TableFileSchema table_file; + table_file.file_size_ = std::get<4>(file); + if(table_file.file_size_ >= table_schema.index_file_size_) { + continue;//skip large file + } + table_file.id_ = std::get<0>(file); table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); - table_file.date_ = std::get<5>(file); + table_file.row_count_ = std::get<5>(file); + table_file.date_ = std::get<6>(file); + table_file.created_on_ = std::get<7>(file); table_file.dimension_ = table_schema.dimension_; + table_file.metric_type_ = table_schema.metric_type_; + table_file.nlist_ = table_schema.nlist_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); if (dateItr == files.end()) { @@ -718,9 +855,11 @@ Status SqliteMetaImpl::GetTableFiles(const std::string& table_id, auto files = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::file_size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, - &TableFileSchema::engine_type_), + &TableFileSchema::engine_type_, + &TableFileSchema::created_on_), where(c(&TableFileSchema::table_id_) == table_id and in(&TableFileSchema::id_, ids) )); @@ -738,10 +877,15 @@ Status SqliteMetaImpl::GetTableFiles(const std::string& table_id, file_schema.id_ = std::get<0>(file); file_schema.file_id_ = std::get<1>(file); file_schema.file_type_ = std::get<2>(file); - file_schema.size_ = std::get<3>(file); - file_schema.date_ = std::get<4>(file); - file_schema.engine_type_ = std::get<5>(file); + file_schema.file_size_ = std::get<3>(file); + file_schema.row_count_ = std::get<4>(file); + file_schema.date_ = std::get<5>(file); + file_schema.engine_type_ = std::get<6>(file); + file_schema.metric_type_ = table_schema.metric_type_; + file_schema.nlist_ = table_schema.nlist_; + file_schema.created_on_ = std::get<7>(file); file_schema.dimension_ = table_schema.dimension_; + utils::GetTableFilePath(options_, file_schema); table_files.emplace_back(file_schema); @@ -797,23 +941,17 @@ Status SqliteMetaImpl::Archive() { Status SqliteMetaImpl::Size(uint64_t &result) { result = 0; try { - auto files = ConnectorPtr->select(columns(&TableFileSchema::size_, - &TableFileSchema::file_type_, - &TableFileSchema::engine_type_), + auto selected = ConnectorPtr->select(columns(sum(&TableFileSchema::file_size_)), where( c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE )); - - for (auto &file : files) { - auto file_size = std::get<0>(file); - auto file_type = std::get<1>(file); - auto engine_type = std::get<2>(file); - if(file_type == (int)TableFileSchema::INDEX && engine_type == (int)EngineType::FAISS_IVFSQ8) { - result += (uint64_t)file_size/4;//hardcode for sq8 - } else { - result += (uint64_t)file_size; + for (auto &total_size : selected) { + if (!std::get<0>(total_size)) { + continue; } + result += (uint64_t) (*std::get<0>(total_size)); } + } catch (std::exception &e) { return HandleException("Encounter exception when calculte db size", e); } @@ -836,7 +974,7 @@ Status SqliteMetaImpl::DiscardFiles(long to_discard_size) { auto commited = ConnectorPtr->transaction([&]() mutable { auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, - &TableFileSchema::size_), + &TableFileSchema::file_size_), where(c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE), order_by(&TableFileSchema::id_), @@ -848,11 +986,11 @@ Status SqliteMetaImpl::DiscardFiles(long to_discard_size) { for (auto &file : selected) { if (to_discard_size <= 0) break; table_file.id_ = std::get<0>(file); - table_file.size_ = std::get<1>(file); + table_file.file_size_ = std::get<1>(file); ids.push_back(table_file.id_); ENGINE_LOG_DEBUG << "Discard table_file.id=" << table_file.file_id_ - << " table_file.size=" << table_file.size_; - to_discard_size -= table_file.size_; + << " table_file.size=" << table_file.file_size_; + to_discard_size -= table_file.file_size_; } if (ids.size() == 0) { @@ -979,6 +1117,9 @@ Status SqliteMetaImpl::UpdateTableFiles(TableFilesSchema &files) { Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { auto now = utils::GetMicroSecTimeStamp(); + std::set table_ids; + + //remove to_delete files try { MetricCollector metric; @@ -1008,6 +1149,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { ENGINE_LOG_DEBUG << "Removing file id:" << table_file.file_id_ << " location:" << table_file.location_; ConnectorPtr->remove(table_file.id_); + table_ids.insert(table_file.table_id_); } return true; }); @@ -1021,6 +1163,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return HandleException("Encounter exception when clean table files", e); } + //remove to_delete tables try { MetricCollector metric; @@ -1033,7 +1176,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { auto commited = ConnectorPtr->transaction([&]() mutable { for (auto &table : tables) { - utils::DeleteTablePath(options_, std::get<1>(table)); + utils::DeleteTablePath(options_, std::get<1>(table), false);//only delete empty folder ConnectorPtr->remove(std::get<0>(table)); } @@ -1049,6 +1192,23 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return HandleException("Encounter exception when clean table files", e); } + //remove deleted table folder + //don't remove table folder until all its files has been deleted + try { + MetricCollector metric; + + for(auto& table_id : table_ids) { + auto selected = ConnectorPtr->select(columns(&TableFileSchema::file_id_), + where(c(&TableFileSchema::table_id_) == table_id)); + if(selected.size() == 0) { + utils::DeleteTablePath(options_, table_id); + } + } + + } catch (std::exception &e) { + return HandleException("Encounter exception when delete table folder", e); + } + return Status::OK(); } @@ -1059,12 +1219,8 @@ Status SqliteMetaImpl::CleanUp() { //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); - auto files = ConnectorPtr->select(columns(&TableFileSchema::id_), - where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_INDEX - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_MERGE)); + std::vector file_type = {(int) TableFileSchema::NEW, (int) TableFileSchema::NEW_INDEX, (int) TableFileSchema::NEW_MERGE}; + auto files = ConnectorPtr->select(columns(&TableFileSchema::id_), where(in(&TableFileSchema::file_type_, file_type))); auto commited = ConnectorPtr->transaction([&]() mutable { for (auto &file : files) { @@ -1091,11 +1247,9 @@ Status SqliteMetaImpl::Count(const std::string &table_id, uint64_t &result) { try { MetricCollector metric; - auto selected = ConnectorPtr->select(columns(&TableFileSchema::size_), - where((c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX - or c(&TableFileSchema::file_type_) == (int) TableFileSchema::INDEX) + std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; + auto selected = ConnectorPtr->select(columns(&TableFileSchema::row_count_), + where(in(&TableFileSchema::file_type_, file_type) and c(&TableFileSchema::table_id_) == table_id)); TableSchema table_schema; @@ -1111,9 +1265,6 @@ Status SqliteMetaImpl::Count(const std::string &table_id, uint64_t &result) { result += std::get<0>(file); } - result /= table_schema.dimension_; - result /= sizeof(float); - } catch (std::exception &e) { return HandleException("Encounter exception when calculate table file size", e); } diff --git a/cpp/src/db/meta/SqliteMetaImpl.h b/cpp/src/db/meta/SqliteMetaImpl.h index 1525f27e4b776afc7757c893199b7575e2f96fda..34808f202f7619daf9d1916d8852b3e08e2810ab 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.h +++ b/cpp/src/db/meta/SqliteMetaImpl.h @@ -51,6 +51,15 @@ class SqliteMetaImpl : public Meta { Status HasNonIndexFiles(const std::string &table_id, bool &has) override; + Status + UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) override; + + Status + DescribeTableIndex(const std::string &table_id, TableIndex& index) override; + + Status + DropTableIndex(const std::string &table_id) override; + Status UpdateTableFilesToIndex(const std::string &table_id) override; diff --git a/cpp/src/db/scheduler/task/IndexLoadTask.cpp b/cpp/src/db/scheduler/task/IndexLoadTask.cpp index 4b242f230d3fd0dc7a9c3f04a2d5aaf6e0198c36..561bf07f13cf39aac6c9aaf1a9711dfdf03048af 100644 --- a/cpp/src/db/scheduler/task/IndexLoadTask.cpp +++ b/cpp/src/db/scheduler/task/IndexLoadTask.cpp @@ -45,7 +45,9 @@ std::shared_ptr IndexLoadTask::Execute() { //step 1: load index ExecutionEnginePtr index_ptr = EngineFactory::Build(file_->dimension_, file_->location_, - (EngineType)file_->engine_type_); + (EngineType)file_->engine_type_, + (MetricType)file_->metric_type_, + file_->nlist_); try { index_ptr->Load(); @@ -75,7 +77,7 @@ std::shared_ptr IndexLoadTask::Execute() { //step 2: return search task for later execution SearchTaskPtr task_ptr = std::make_shared(); task_ptr->index_id_ = file_->id_; - task_ptr->index_type_ = file_->file_type_; + task_ptr->file_type_ = file_->file_type_; task_ptr->index_engine_ = index_ptr; task_ptr->search_contexts_.swap(search_contexts_); return std::static_pointer_cast(task_ptr); diff --git a/cpp/src/db/scheduler/task/SearchTask.cpp b/cpp/src/db/scheduler/task/SearchTask.cpp index fd9d679d5e6ce2c761987f46ffb0f5cb1c1cd49c..4e7c0f461188945515c7e56d1d692ead3abeee1e 100644 --- a/cpp/src/db/scheduler/task/SearchTask.cpp +++ b/cpp/src/db/scheduler/task/SearchTask.cpp @@ -76,20 +76,10 @@ void CollectDurationMetrics(int index_type, double total_time) { } } -std::string GetMetricType() { - server::ServerConfig &config = server::ServerConfig::GetInstance(); - server::ConfigNode& engine_config = config.GetConfig(server::CONFIG_ENGINE); - return engine_config.GetValue(server::CONFIG_METRICTYPE, "L2"); -} - } SearchTask::SearchTask() : IScheduleTask(ScheduleTaskType::kSearch) { - std::string metric_type = GetMetricType(); - if(metric_type != "L2") { - metric_l2 = false; - } } std::shared_ptr SearchTask::Execute() { @@ -104,6 +94,8 @@ std::shared_ptr SearchTask::Execute() { auto start_time = METRICS_NOW_TIME; + bool metric_l2 = (index_engine_->IndexMetricType() == MetricType::L2); + std::vector output_ids; std::vector output_distence; for(auto& context : search_contexts_) { @@ -147,7 +139,7 @@ std::shared_ptr SearchTask::Execute() { auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - CollectDurationMetrics(index_type_, total_time); + CollectDurationMetrics(file_type_, total_time); rc.ElapseFromBegin("totally cost"); diff --git a/cpp/src/db/scheduler/task/SearchTask.h b/cpp/src/db/scheduler/task/SearchTask.h index 034b53d4dc3431514812b1a435286681c6bdfbff..60100464466127edfd732e6f28b17424fdb4a532 100644 --- a/cpp/src/db/scheduler/task/SearchTask.h +++ b/cpp/src/db/scheduler/task/SearchTask.h @@ -37,10 +37,9 @@ public: public: size_t index_id_ = 0; - int index_type_ = 0; //for metrics + int file_type_ = 0; //for metrics ExecutionEnginePtr index_engine_; std::vector search_contexts_; - bool metric_l2 = true; }; using SearchTaskPtr = std::shared_ptr; diff --git a/cpp/src/grpc/cpp_gen.sh b/cpp/src/grpc/cpp_gen.sh index c441783e7b853d923b1e18f45ddeebbf5c172038..62b9d95728a063ec1a91e2b353aa64b9c0e68138 100755 --- a/cpp/src/grpc/cpp_gen.sh +++ b/cpp/src/grpc/cpp_gen.sh @@ -1,9 +1,9 @@ #!/bin/bash -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-status --plugin=protoc-gen-grpc="/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" status.proto +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-status --plugin=protoc-gen-grpc="../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" status.proto -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-status status.proto +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-status status.proto -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-milvus --plugin=protoc-gen-grpc="/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" milvus.proto +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-milvus --plugin=protoc-gen-grpc="../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" milvus.proto -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-milvus milvus.proto \ No newline at end of file +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-milvus milvus.proto \ No newline at end of file diff --git a/cpp/src/grpc/gen-milvus/milvus.pb.cc b/cpp/src/grpc/gen-milvus/milvus.pb.cc index 5ec8fddba07d2b11c4103d7800ebc9182e1989fd..25f115db18dec48bcad8d1d378e39db6cbbef007 100644 --- a/cpp/src/grpc/gen-milvus/milvus.pb.cc +++ b/cpp/src/grpc/gen-milvus/milvus.pb.cc @@ -365,9 +365,7 @@ const ::PROTOBUF_NAMESPACE_ID::uint32 TableStruct_milvus_2eproto::offsets[] PROT ~0u, // no _oneof_case_ ~0u, // no _weak_field_map_ PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, table_name_), - PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, index_type_), PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, dimension_), - PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, store_raw_vector_), ~0u, // no _has_bits_ PROTOBUF_FIELD_OFFSET(::milvus::grpc::Range, _internal_metadata_), ~0u, // no _extensions_ @@ -481,21 +479,21 @@ const ::PROTOBUF_NAMESPACE_ID::uint32 TableStruct_milvus_2eproto::offsets[] PROT static const ::PROTOBUF_NAMESPACE_ID::internal::MigrationSchema schemas[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = { { 0, -1, sizeof(::milvus::grpc::TableName)}, { 7, -1, sizeof(::milvus::grpc::TableSchema)}, - { 16, -1, sizeof(::milvus::grpc::Range)}, - { 23, -1, sizeof(::milvus::grpc::RowRecord)}, - { 29, -1, sizeof(::milvus::grpc::InsertParam)}, - { 37, -1, sizeof(::milvus::grpc::VectorIds)}, - { 44, -1, sizeof(::milvus::grpc::SearchParam)}, - { 54, -1, sizeof(::milvus::grpc::SearchInFilesParam)}, - { 61, -1, sizeof(::milvus::grpc::QueryResult)}, - { 68, -1, sizeof(::milvus::grpc::TopKQueryResult)}, - { 75, -1, sizeof(::milvus::grpc::StringReply)}, - { 82, -1, sizeof(::milvus::grpc::BoolReply)}, - { 89, -1, sizeof(::milvus::grpc::TableRowCount)}, - { 96, -1, sizeof(::milvus::grpc::Command)}, - { 102, -1, sizeof(::milvus::grpc::Index)}, - { 111, -1, sizeof(::milvus::grpc::IndexParam)}, - { 118, -1, sizeof(::milvus::grpc::DeleteByRangeParam)}, + { 14, -1, sizeof(::milvus::grpc::Range)}, + { 21, -1, sizeof(::milvus::grpc::RowRecord)}, + { 27, -1, sizeof(::milvus::grpc::InsertParam)}, + { 35, -1, sizeof(::milvus::grpc::VectorIds)}, + { 42, -1, sizeof(::milvus::grpc::SearchParam)}, + { 52, -1, sizeof(::milvus::grpc::SearchInFilesParam)}, + { 59, -1, sizeof(::milvus::grpc::QueryResult)}, + { 66, -1, sizeof(::milvus::grpc::TopKQueryResult)}, + { 73, -1, sizeof(::milvus::grpc::StringReply)}, + { 80, -1, sizeof(::milvus::grpc::BoolReply)}, + { 87, -1, sizeof(::milvus::grpc::TableRowCount)}, + { 94, -1, sizeof(::milvus::grpc::Command)}, + { 100, -1, sizeof(::milvus::grpc::Index)}, + { 109, -1, sizeof(::milvus::grpc::IndexParam)}, + { 116, -1, sizeof(::milvus::grpc::DeleteByRangeParam)}, }; static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] = { @@ -521,65 +519,64 @@ static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] = const char descriptor_table_protodef_milvus_2eproto[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = "\n\014milvus.proto\022\013milvus.grpc\032\014status.prot" "o\"D\n\tTableName\022#\n\006status\030\001 \001(\0132\023.milvus." - "grpc.Status\022\022\n\ntable_name\030\002 \001(\t\"z\n\013Table" + "grpc.Status\022\022\n\ntable_name\030\002 \001(\t\"L\n\013Table" "Schema\022*\n\ntable_name\030\001 \001(\0132\026.milvus.grpc" - ".TableName\022\022\n\nindex_type\030\002 \001(\005\022\021\n\tdimens" - "ion\030\003 \001(\003\022\030\n\020store_raw_vector\030\004 \001(\010\"/\n\005R" - "ange\022\023\n\013start_value\030\001 \001(\t\022\021\n\tend_value\030\002" - " \001(\t\" \n\tRowRecord\022\023\n\013vector_data\030\001 \003(\002\"i" - "\n\013InsertParam\022\022\n\ntable_name\030\001 \001(\t\0220\n\020row" - "_record_array\030\002 \003(\0132\026.milvus.grpc.RowRec" - "ord\022\024\n\014row_id_array\030\003 \003(\003\"I\n\tVectorIds\022#" - "\n\006status\030\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017v" - "ector_id_array\030\002 \003(\003\"\242\001\n\013SearchParam\022\022\n\n" - "table_name\030\001 \001(\t\0222\n\022query_record_array\030\002" - " \003(\0132\026.milvus.grpc.RowRecord\022-\n\021query_ra" - "nge_array\030\003 \003(\0132\022.milvus.grpc.Range\022\014\n\004t" - "opk\030\004 \001(\003\022\016\n\006nprobe\030\005 \001(\003\"[\n\022SearchInFil" - "esParam\022\025\n\rfile_id_array\030\001 \003(\t\022.\n\014search" - "_param\030\002 \001(\0132\030.milvus.grpc.SearchParam\"+" - "\n\013QueryResult\022\n\n\002id\030\001 \001(\003\022\020\n\010distance\030\002 " - "\001(\001\"m\n\017TopKQueryResult\022#\n\006status\030\001 \001(\0132\023" - ".milvus.grpc.Status\0225\n\023query_result_arra" - "ys\030\002 \003(\0132\030.milvus.grpc.QueryResult\"H\n\013St" - "ringReply\022#\n\006status\030\001 \001(\0132\023.milvus.grpc." - "Status\022\024\n\014string_reply\030\002 \001(\t\"D\n\tBoolRepl" - "y\022#\n\006status\030\001 \001(\0132\023.milvus.grpc.Status\022\022" - "\n\nbool_reply\030\002 \001(\010\"M\n\rTableRowCount\022#\n\006s" - "tatus\030\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017tabl" - "e_row_count\030\002 \001(\003\"\026\n\007Command\022\013\n\003cmd\030\001 \001(" - "\t\"X\n\005Index\022\022\n\nindex_type\030\001 \001(\005\022\r\n\005nlist\030" - "\002 \001(\003\022\027\n\017index_file_size\030\003 \001(\005\022\023\n\013metric" - "_type\030\004 \001(\005\"[\n\nIndexParam\022*\n\ntable_name\030" - "\001 \001(\0132\026.milvus.grpc.TableName\022!\n\005index\030\002" - " \001(\0132\022.milvus.grpc.Index\"K\n\022DeleteByRang" - "eParam\022!\n\005range\030\001 \001(\0132\022.milvus.grpc.Rang" - "e\022\022\n\ntable_name\030\002 \001(\t2\352\007\n\rMilvusService\022" - ">\n\013CreateTable\022\030.milvus.grpc.TableSchema" - "\032\023.milvus.grpc.Status\"\000\022<\n\010HasTable\022\026.mi" - "lvus.grpc.TableName\032\026.milvus.grpc.BoolRe" - "ply\"\000\022:\n\tDropTable\022\026.milvus.grpc.TableNa" - "me\032\023.milvus.grpc.Status\"\000\022=\n\013CreateIndex" - "\022\027.milvus.grpc.IndexParam\032\023.milvus.grpc." - "Status\"\000\022<\n\006Insert\022\030.milvus.grpc.InsertP" - "aram\032\026.milvus.grpc.VectorIds\"\000\022D\n\006Search" - "\022\030.milvus.grpc.SearchParam\032\034.milvus.grpc" - ".TopKQueryResult\"\0000\001\022R\n\rSearchInFiles\022\037." - "milvus.grpc.SearchInFilesParam\032\034.milvus." - "grpc.TopKQueryResult\"\0000\001\022C\n\rDescribeTabl" - "e\022\026.milvus.grpc.TableName\032\030.milvus.grpc." - "TableSchema\"\000\022B\n\nCountTable\022\026.milvus.grp" - "c.TableName\032\032.milvus.grpc.TableRowCount\"" - "\000\022>\n\nShowTables\022\024.milvus.grpc.Command\032\026." - "milvus.grpc.TableName\"\0000\001\0227\n\003Cmd\022\024.milvu" - "s.grpc.Command\032\030.milvus.grpc.StringReply" - "\"\000\022G\n\rDeleteByRange\022\037.milvus.grpc.Delete" - "ByRangeParam\032\023.milvus.grpc.Status\"\000\022=\n\014P" - "reloadTable\022\026.milvus.grpc.TableName\032\023.mi" - "lvus.grpc.Status\"\000\022B\n\rDescribeIndex\022\026.mi" - "lvus.grpc.TableName\032\027.milvus.grpc.IndexP" - "aram\"\000\022:\n\tDropIndex\022\026.milvus.grpc.TableN" - "ame\032\023.milvus.grpc.Status\"\000b\006proto3" + ".TableName\022\021\n\tdimension\030\002 \001(\003\"/\n\005Range\022\023" + "\n\013start_value\030\001 \001(\t\022\021\n\tend_value\030\002 \001(\t\" " + "\n\tRowRecord\022\023\n\013vector_data\030\001 \003(\002\"i\n\013Inse" + "rtParam\022\022\n\ntable_name\030\001 \001(\t\0220\n\020row_recor" + "d_array\030\002 \003(\0132\026.milvus.grpc.RowRecord\022\024\n" + "\014row_id_array\030\003 \003(\003\"I\n\tVectorIds\022#\n\006stat" + "us\030\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017vector_" + "id_array\030\002 \003(\003\"\242\001\n\013SearchParam\022\022\n\ntable_" + "name\030\001 \001(\t\0222\n\022query_record_array\030\002 \003(\0132\026" + ".milvus.grpc.RowRecord\022-\n\021query_range_ar" + "ray\030\003 \003(\0132\022.milvus.grpc.Range\022\014\n\004topk\030\004 " + "\001(\003\022\016\n\006nprobe\030\005 \001(\003\"[\n\022SearchInFilesPara" + "m\022\025\n\rfile_id_array\030\001 \003(\t\022.\n\014search_param" + "\030\002 \001(\0132\030.milvus.grpc.SearchParam\"+\n\013Quer" + "yResult\022\n\n\002id\030\001 \001(\003\022\020\n\010distance\030\002 \001(\001\"m\n" + "\017TopKQueryResult\022#\n\006status\030\001 \001(\0132\023.milvu" + "s.grpc.Status\0225\n\023query_result_arrays\030\002 \003" + "(\0132\030.milvus.grpc.QueryResult\"H\n\013StringRe" + "ply\022#\n\006status\030\001 \001(\0132\023.milvus.grpc.Status" + "\022\024\n\014string_reply\030\002 \001(\t\"D\n\tBoolReply\022#\n\006s" + "tatus\030\001 \001(\0132\023.milvus.grpc.Status\022\022\n\nbool" + "_reply\030\002 \001(\010\"M\n\rTableRowCount\022#\n\006status\030" + "\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017table_row_" + "count\030\002 \001(\003\"\026\n\007Command\022\013\n\003cmd\030\001 \001(\t\"X\n\005I" + "ndex\022\022\n\nindex_type\030\001 \001(\005\022\r\n\005nlist\030\002 \001(\005\022" + "\027\n\017index_file_size\030\003 \001(\005\022\023\n\013metric_type\030" + "\004 \001(\005\"[\n\nIndexParam\022*\n\ntable_name\030\001 \001(\0132" + "\026.milvus.grpc.TableName\022!\n\005index\030\002 \001(\0132\022" + ".milvus.grpc.Index\"K\n\022DeleteByRangeParam" + "\022!\n\005range\030\001 \001(\0132\022.milvus.grpc.Range\022\022\n\nt" + "able_name\030\002 \001(\t2\352\007\n\rMilvusService\022>\n\013Cre" + "ateTable\022\030.milvus.grpc.TableSchema\032\023.mil" + "vus.grpc.Status\"\000\022<\n\010HasTable\022\026.milvus.g" + "rpc.TableName\032\026.milvus.grpc.BoolReply\"\000\022" + ":\n\tDropTable\022\026.milvus.grpc.TableName\032\023.m" + "ilvus.grpc.Status\"\000\022=\n\013CreateIndex\022\027.mil" + "vus.grpc.IndexParam\032\023.milvus.grpc.Status" + "\"\000\022<\n\006Insert\022\030.milvus.grpc.InsertParam\032\026" + ".milvus.grpc.VectorIds\"\000\022D\n\006Search\022\030.mil" + "vus.grpc.SearchParam\032\034.milvus.grpc.TopKQ" + "ueryResult\"\0000\001\022R\n\rSearchInFiles\022\037.milvus" + ".grpc.SearchInFilesParam\032\034.milvus.grpc.T" + "opKQueryResult\"\0000\001\022C\n\rDescribeTable\022\026.mi" + "lvus.grpc.TableName\032\030.milvus.grpc.TableS" + "chema\"\000\022B\n\nCountTable\022\026.milvus.grpc.Tabl" + "eName\032\032.milvus.grpc.TableRowCount\"\000\022>\n\nS" + "howTables\022\024.milvus.grpc.Command\032\026.milvus" + ".grpc.TableName\"\0000\001\0227\n\003Cmd\022\024.milvus.grpc" + ".Command\032\030.milvus.grpc.StringReply\"\000\022G\n\r" + "DeleteByRange\022\037.milvus.grpc.DeleteByRang" + "eParam\032\023.milvus.grpc.Status\"\000\022=\n\014Preload" + "Table\022\026.milvus.grpc.TableName\032\023.milvus.g" + "rpc.Status\"\000\022B\n\rDescribeIndex\022\026.milvus.g" + "rpc.TableName\032\027.milvus.grpc.IndexParam\"\000" + "\022:\n\tDropIndex\022\026.milvus.grpc.TableName\032\023." + "milvus.grpc.Status\"\000b\006proto3" ; static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_milvus_2eproto_deps[1] = { &::descriptor_table_status_2eproto, @@ -606,7 +603,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_mil static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_milvus_2eproto_once; static bool descriptor_table_milvus_2eproto_initialized = false; const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_milvus_2eproto = { - &descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2434, + &descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2388, &descriptor_table_milvus_2eproto_once, descriptor_table_milvus_2eproto_sccs, descriptor_table_milvus_2eproto_deps, 17, 1, schemas, file_default_instances, TableStruct_milvus_2eproto::offsets, file_level_metadata_milvus_2eproto, 17, file_level_enum_descriptors_milvus_2eproto, file_level_service_descriptors_milvus_2eproto, @@ -981,17 +978,15 @@ TableSchema::TableSchema(const TableSchema& from) } else { table_name_ = nullptr; } - ::memcpy(&dimension_, &from.dimension_, - static_cast(reinterpret_cast(&store_raw_vector_) - - reinterpret_cast(&dimension_)) + sizeof(store_raw_vector_)); + dimension_ = from.dimension_; // @@protoc_insertion_point(copy_constructor:milvus.grpc.TableSchema) } void TableSchema::SharedCtor() { ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_TableSchema_milvus_2eproto.base); ::memset(&table_name_, 0, static_cast( - reinterpret_cast(&store_raw_vector_) - - reinterpret_cast(&table_name_)) + sizeof(store_raw_vector_)); + reinterpret_cast(&dimension_) - + reinterpret_cast(&table_name_)) + sizeof(dimension_)); } TableSchema::~TableSchema() { @@ -1022,9 +1017,7 @@ void TableSchema::Clear() { delete table_name_; } table_name_ = nullptr; - ::memset(&dimension_, 0, static_cast( - reinterpret_cast(&store_raw_vector_) - - reinterpret_cast(&dimension_)) + sizeof(store_raw_vector_)); + dimension_ = PROTOBUF_LONGLONG(0); _internal_metadata_.Clear(); } @@ -1043,27 +1036,13 @@ const char* TableSchema::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID CHK_(ptr); } else goto handle_unusual; continue; - // int32 index_type = 2; + // int64 dimension = 2; case 2: if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 16)) { - index_type_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); - CHK_(ptr); - } else goto handle_unusual; - continue; - // int64 dimension = 3; - case 3: - if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 24)) { dimension_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); CHK_(ptr); } else goto handle_unusual; continue; - // bool store_raw_vector = 4; - case 4: - if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 32)) { - store_raw_vector_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); - CHK_(ptr); - } else goto handle_unusual; - continue; default: { handle_unusual: if ((tag & 7) == 4 || tag == 0) { @@ -1105,23 +1084,10 @@ bool TableSchema::MergePartialFromCodedStream( break; } - // int32 index_type = 2; + // int64 dimension = 2; case 2: { if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (16 & 0xFF)) { - DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< - ::PROTOBUF_NAMESPACE_ID::int32, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT32>( - input, &index_type_))); - } else { - goto handle_unusual; - } - break; - } - - // int64 dimension = 3; - case 3: { - if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (24 & 0xFF)) { - DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< ::PROTOBUF_NAMESPACE_ID::int64, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT64>( input, &dimension_))); @@ -1131,19 +1097,6 @@ bool TableSchema::MergePartialFromCodedStream( break; } - // bool store_raw_vector = 4; - case 4: { - if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (32 & 0xFF)) { - - DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< - bool, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_BOOL>( - input, &store_raw_vector_))); - } else { - goto handle_unusual; - } - break; - } - default: { handle_unusual: if (tag == 0) { @@ -1177,19 +1130,9 @@ void TableSchema::SerializeWithCachedSizes( 1, _Internal::table_name(this), output); } - // int32 index_type = 2; - if (this->index_type() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32(2, this->index_type(), output); - } - - // int64 dimension = 3; + // int64 dimension = 2; if (this->dimension() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64(3, this->dimension(), output); - } - - // bool store_raw_vector = 4; - if (this->store_raw_vector() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBool(4, this->store_raw_vector(), output); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64(2, this->dimension(), output); } if (_internal_metadata_.have_unknown_fields()) { @@ -1212,19 +1155,9 @@ void TableSchema::SerializeWithCachedSizes( 1, _Internal::table_name(this), target); } - // int32 index_type = 2; - if (this->index_type() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(2, this->index_type(), target); - } - - // int64 dimension = 3; + // int64 dimension = 2; if (this->dimension() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(3, this->dimension(), target); - } - - // bool store_raw_vector = 4; - if (this->store_raw_vector() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(4, this->store_raw_vector(), target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(2, this->dimension(), target); } if (_internal_metadata_.have_unknown_fields()) { @@ -1255,25 +1188,13 @@ size_t TableSchema::ByteSizeLong() const { *table_name_); } - // int64 dimension = 3; + // int64 dimension = 2; if (this->dimension() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int64Size( this->dimension()); } - // int32 index_type = 2; - if (this->index_type() != 0) { - total_size += 1 + - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( - this->index_type()); - } - - // bool store_raw_vector = 4; - if (this->store_raw_vector() != 0) { - total_size += 1 + 1; - } - int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; @@ -1307,12 +1228,6 @@ void TableSchema::MergeFrom(const TableSchema& from) { if (from.dimension() != 0) { set_dimension(from.dimension()); } - if (from.index_type() != 0) { - set_index_type(from.index_type()); - } - if (from.store_raw_vector() != 0) { - set_store_raw_vector(from.store_raw_vector()); - } } void TableSchema::CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) { @@ -1338,8 +1253,6 @@ void TableSchema::InternalSwap(TableSchema* other) { _internal_metadata_.Swap(&other->_internal_metadata_); swap(table_name_, other->table_name_); swap(dimension_, other->dimension_); - swap(index_type_, other->index_type_); - swap(store_raw_vector_, other->store_raw_vector_); } ::PROTOBUF_NAMESPACE_ID::Metadata TableSchema::GetMetadata() const { @@ -5355,16 +5268,16 @@ Index::Index(const Index& from) : ::PROTOBUF_NAMESPACE_ID::Message(), _internal_metadata_(nullptr) { _internal_metadata_.MergeFrom(from._internal_metadata_); - ::memcpy(&nlist_, &from.nlist_, + ::memcpy(&index_type_, &from.index_type_, static_cast(reinterpret_cast(&metric_type_) - - reinterpret_cast(&nlist_)) + sizeof(metric_type_)); + reinterpret_cast(&index_type_)) + sizeof(metric_type_)); // @@protoc_insertion_point(copy_constructor:milvus.grpc.Index) } void Index::SharedCtor() { - ::memset(&nlist_, 0, static_cast( + ::memset(&index_type_, 0, static_cast( reinterpret_cast(&metric_type_) - - reinterpret_cast(&nlist_)) + sizeof(metric_type_)); + reinterpret_cast(&index_type_)) + sizeof(metric_type_)); } Index::~Index() { @@ -5390,9 +5303,9 @@ void Index::Clear() { // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; - ::memset(&nlist_, 0, static_cast( + ::memset(&index_type_, 0, static_cast( reinterpret_cast(&metric_type_) - - reinterpret_cast(&nlist_)) + sizeof(metric_type_)); + reinterpret_cast(&index_type_)) + sizeof(metric_type_)); _internal_metadata_.Clear(); } @@ -5411,7 +5324,7 @@ const char* Index::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::inte CHK_(ptr); } else goto handle_unusual; continue; - // int64 nlist = 2; + // int32 nlist = 2; case 2: if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 16)) { nlist_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); @@ -5475,12 +5388,12 @@ bool Index::MergePartialFromCodedStream( break; } - // int64 nlist = 2; + // int32 nlist = 2; case 2: { if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (16 & 0xFF)) { DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< - ::PROTOBUF_NAMESPACE_ID::int64, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT64>( + ::PROTOBUF_NAMESPACE_ID::int32, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT32>( input, &nlist_))); } else { goto handle_unusual; @@ -5546,9 +5459,9 @@ void Index::SerializeWithCachedSizes( ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32(1, this->index_type(), output); } - // int64 nlist = 2; + // int32 nlist = 2; if (this->nlist() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64(2, this->nlist(), output); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32(2, this->nlist(), output); } // int32 index_file_size = 3; @@ -5579,9 +5492,9 @@ void Index::SerializeWithCachedSizes( target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(1, this->index_type(), target); } - // int64 nlist = 2; + // int32 nlist = 2; if (this->nlist() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(2, this->nlist(), target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(2, this->nlist(), target); } // int32 index_file_size = 3; @@ -5615,13 +5528,6 @@ size_t Index::ByteSizeLong() const { // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; - // int64 nlist = 2; - if (this->nlist() != 0) { - total_size += 1 + - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int64Size( - this->nlist()); - } - // int32 index_type = 1; if (this->index_type() != 0) { total_size += 1 + @@ -5629,6 +5535,13 @@ size_t Index::ByteSizeLong() const { this->index_type()); } + // int32 nlist = 2; + if (this->nlist() != 0) { + total_size += 1 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->nlist()); + } + // int32 index_file_size = 3; if (this->index_file_size() != 0) { total_size += 1 + @@ -5670,12 +5583,12 @@ void Index::MergeFrom(const Index& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (from.nlist() != 0) { - set_nlist(from.nlist()); - } if (from.index_type() != 0) { set_index_type(from.index_type()); } + if (from.nlist() != 0) { + set_nlist(from.nlist()); + } if (from.index_file_size() != 0) { set_index_file_size(from.index_file_size()); } @@ -5705,8 +5618,8 @@ bool Index::IsInitialized() const { void Index::InternalSwap(Index* other) { using std::swap; _internal_metadata_.Swap(&other->_internal_metadata_); - swap(nlist_, other->nlist_); swap(index_type_, other->index_type_); + swap(nlist_, other->nlist_); swap(index_file_size_, other->index_file_size_); swap(metric_type_, other->metric_type_); } diff --git a/cpp/src/grpc/gen-milvus/milvus.pb.h b/cpp/src/grpc/gen-milvus/milvus.pb.h index d4c33b848a4ad5c81ef1a8d0ed36ba830197296b..2c26bc4e4c7531b09032dcebe20bb0d38ffe400b 100644 --- a/cpp/src/grpc/gen-milvus/milvus.pb.h +++ b/cpp/src/grpc/gen-milvus/milvus.pb.h @@ -395,9 +395,7 @@ class TableSchema : enum : int { kTableNameFieldNumber = 1, - kDimensionFieldNumber = 3, - kIndexTypeFieldNumber = 2, - kStoreRawVectorFieldNumber = 4, + kDimensionFieldNumber = 2, }; // .milvus.grpc.TableName table_name = 1; bool has_table_name() const; @@ -407,21 +405,11 @@ class TableSchema : ::milvus::grpc::TableName* mutable_table_name(); void set_allocated_table_name(::milvus::grpc::TableName* table_name); - // int64 dimension = 3; + // int64 dimension = 2; void clear_dimension(); ::PROTOBUF_NAMESPACE_ID::int64 dimension() const; void set_dimension(::PROTOBUF_NAMESPACE_ID::int64 value); - // int32 index_type = 2; - void clear_index_type(); - ::PROTOBUF_NAMESPACE_ID::int32 index_type() const; - void set_index_type(::PROTOBUF_NAMESPACE_ID::int32 value); - - // bool store_raw_vector = 4; - void clear_store_raw_vector(); - bool store_raw_vector() const; - void set_store_raw_vector(bool value); - // @@protoc_insertion_point(class_scope:milvus.grpc.TableSchema) private: class _Internal; @@ -429,8 +417,6 @@ class TableSchema : ::PROTOBUF_NAMESPACE_ID::internal::InternalMetadataWithArena _internal_metadata_; ::milvus::grpc::TableName* table_name_; ::PROTOBUF_NAMESPACE_ID::int64 dimension_; - ::PROTOBUF_NAMESPACE_ID::int32 index_type_; - bool store_raw_vector_; mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; friend struct ::TableStruct_milvus_2eproto; }; @@ -2330,21 +2316,21 @@ class Index : // accessors ------------------------------------------------------- enum : int { - kNlistFieldNumber = 2, kIndexTypeFieldNumber = 1, + kNlistFieldNumber = 2, kIndexFileSizeFieldNumber = 3, kMetricTypeFieldNumber = 4, }; - // int64 nlist = 2; - void clear_nlist(); - ::PROTOBUF_NAMESPACE_ID::int64 nlist() const; - void set_nlist(::PROTOBUF_NAMESPACE_ID::int64 value); - // int32 index_type = 1; void clear_index_type(); ::PROTOBUF_NAMESPACE_ID::int32 index_type() const; void set_index_type(::PROTOBUF_NAMESPACE_ID::int32 value); + // int32 nlist = 2; + void clear_nlist(); + ::PROTOBUF_NAMESPACE_ID::int32 nlist() const; + void set_nlist(::PROTOBUF_NAMESPACE_ID::int32 value); + // int32 index_file_size = 3; void clear_index_file_size(); ::PROTOBUF_NAMESPACE_ID::int32 index_file_size() const; @@ -2360,8 +2346,8 @@ class Index : class _Internal; ::PROTOBUF_NAMESPACE_ID::internal::InternalMetadataWithArena _internal_metadata_; - ::PROTOBUF_NAMESPACE_ID::int64 nlist_; ::PROTOBUF_NAMESPACE_ID::int32 index_type_; + ::PROTOBUF_NAMESPACE_ID::int32 nlist_; ::PROTOBUF_NAMESPACE_ID::int32 index_file_size_; ::PROTOBUF_NAMESPACE_ID::int32 metric_type_; mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; @@ -2820,21 +2806,7 @@ inline void TableSchema::set_allocated_table_name(::milvus::grpc::TableName* tab // @@protoc_insertion_point(field_set_allocated:milvus.grpc.TableSchema.table_name) } -// int32 index_type = 2; -inline void TableSchema::clear_index_type() { - index_type_ = 0; -} -inline ::PROTOBUF_NAMESPACE_ID::int32 TableSchema::index_type() const { - // @@protoc_insertion_point(field_get:milvus.grpc.TableSchema.index_type) - return index_type_; -} -inline void TableSchema::set_index_type(::PROTOBUF_NAMESPACE_ID::int32 value) { - - index_type_ = value; - // @@protoc_insertion_point(field_set:milvus.grpc.TableSchema.index_type) -} - -// int64 dimension = 3; +// int64 dimension = 2; inline void TableSchema::clear_dimension() { dimension_ = PROTOBUF_LONGLONG(0); } @@ -2848,20 +2820,6 @@ inline void TableSchema::set_dimension(::PROTOBUF_NAMESPACE_ID::int64 value) { // @@protoc_insertion_point(field_set:milvus.grpc.TableSchema.dimension) } -// bool store_raw_vector = 4; -inline void TableSchema::clear_store_raw_vector() { - store_raw_vector_ = false; -} -inline bool TableSchema::store_raw_vector() const { - // @@protoc_insertion_point(field_get:milvus.grpc.TableSchema.store_raw_vector) - return store_raw_vector_; -} -inline void TableSchema::set_store_raw_vector(bool value) { - - store_raw_vector_ = value; - // @@protoc_insertion_point(field_set:milvus.grpc.TableSchema.store_raw_vector) -} - // ------------------------------------------------------------------- // Range @@ -3869,15 +3827,15 @@ inline void Index::set_index_type(::PROTOBUF_NAMESPACE_ID::int32 value) { // @@protoc_insertion_point(field_set:milvus.grpc.Index.index_type) } -// int64 nlist = 2; +// int32 nlist = 2; inline void Index::clear_nlist() { - nlist_ = PROTOBUF_LONGLONG(0); + nlist_ = 0; } -inline ::PROTOBUF_NAMESPACE_ID::int64 Index::nlist() const { +inline ::PROTOBUF_NAMESPACE_ID::int32 Index::nlist() const { // @@protoc_insertion_point(field_get:milvus.grpc.Index.nlist) return nlist_; } -inline void Index::set_nlist(::PROTOBUF_NAMESPACE_ID::int64 value) { +inline void Index::set_nlist(::PROTOBUF_NAMESPACE_ID::int32 value) { nlist_ = value; // @@protoc_insertion_point(field_set:milvus.grpc.Index.nlist) diff --git a/cpp/src/grpc/gen-status/status.pb.cc b/cpp/src/grpc/gen-status/status.pb.cc index 79c4127020b5da94efb98e7b1f4c9a8b4e267ffb..e6eea519b7049736fcd76c57c0976854c1f9bbdf 100644 --- a/cpp/src/grpc/gen-status/status.pb.cc +++ b/cpp/src/grpc/gen-status/status.pb.cc @@ -61,7 +61,7 @@ static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] = const char descriptor_table_protodef_status_2eproto[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = "\n\014status.proto\022\013milvus.grpc\"D\n\006Status\022*\n" "\nerror_code\030\001 \001(\0162\026.milvus.grpc.ErrorCod" - "e\022\016\n\006reason\030\002 \001(\t*\354\003\n\tErrorCode\022\013\n\007SUCCE" + "e\022\016\n\006reason\030\002 \001(\t*\230\004\n\tErrorCode\022\013\n\007SUCCE" "SS\020\000\022\024\n\020UNEXPECTED_ERROR\020\001\022\022\n\016CONNECT_FA" "ILED\020\002\022\025\n\021PERMISSION_DENIED\020\003\022\024\n\020TABLE_N" "OT_EXISTS\020\004\022\024\n\020ILLEGAL_ARGUMENT\020\005\022\021\n\rILL" @@ -73,7 +73,9 @@ const char descriptor_table_protodef_status_2eproto[] PROTOBUF_SECTION_VARIABLE( "TA_FAILED\020\017\022\020\n\014CACHE_FAILED\020\020\022\030\n\024CANNOT_" "CREATE_FOLDER\020\021\022\026\n\022CANNOT_CREATE_FILE\020\022\022" "\030\n\024CANNOT_DELETE_FOLDER\020\023\022\026\n\022CANNOT_DELE" - "TE_FILE\020\024\022\025\n\021BUILD_INDEX_ERROR\020\025b\006proto3" + "TE_FILE\020\024\022\025\n\021BUILD_INDEX_ERROR\020\025\022\021\n\rILLE" + "GAL_NLIST\020\026\022\027\n\023ILLEGAL_METRIC_TYPE\020\027b\006pr" + "oto3" ; static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_status_2eproto_deps[1] = { }; @@ -83,7 +85,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_sta static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_status_2eproto_once; static bool descriptor_table_status_2eproto_initialized = false; const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_status_2eproto = { - &descriptor_table_status_2eproto_initialized, descriptor_table_protodef_status_2eproto, "status.proto", 600, + &descriptor_table_status_2eproto_initialized, descriptor_table_protodef_status_2eproto, "status.proto", 644, &descriptor_table_status_2eproto_once, descriptor_table_status_2eproto_sccs, descriptor_table_status_2eproto_deps, 1, 0, schemas, file_default_instances, TableStruct_status_2eproto::offsets, file_level_metadata_status_2eproto, 1, file_level_enum_descriptors_status_2eproto, file_level_service_descriptors_status_2eproto, @@ -121,6 +123,8 @@ bool ErrorCode_IsValid(int value) { case 19: case 20: case 21: + case 22: + case 23: return true; default: return false; diff --git a/cpp/src/grpc/gen-status/status.pb.h b/cpp/src/grpc/gen-status/status.pb.h index fe200e09fbaa43adce9f5f67b3621b82ad286d6d..5654256c7f8559c75f9b2621fc4543d68f713c76 100644 --- a/cpp/src/grpc/gen-status/status.pb.h +++ b/cpp/src/grpc/gen-status/status.pb.h @@ -91,12 +91,14 @@ enum ErrorCode : int { CANNOT_DELETE_FOLDER = 19, CANNOT_DELETE_FILE = 20, BUILD_INDEX_ERROR = 21, + ILLEGAL_NLIST = 22, + ILLEGAL_METRIC_TYPE = 23, ErrorCode_INT_MIN_SENTINEL_DO_NOT_USE_ = std::numeric_limits<::PROTOBUF_NAMESPACE_ID::int32>::min(), ErrorCode_INT_MAX_SENTINEL_DO_NOT_USE_ = std::numeric_limits<::PROTOBUF_NAMESPACE_ID::int32>::max() }; bool ErrorCode_IsValid(int value); constexpr ErrorCode ErrorCode_MIN = SUCCESS; -constexpr ErrorCode ErrorCode_MAX = BUILD_INDEX_ERROR; +constexpr ErrorCode ErrorCode_MAX = ILLEGAL_METRIC_TYPE; constexpr int ErrorCode_ARRAYSIZE = ErrorCode_MAX + 1; const ::PROTOBUF_NAMESPACE_ID::EnumDescriptor* ErrorCode_descriptor(); diff --git a/cpp/src/grpc/milvus.proto b/cpp/src/grpc/milvus.proto index 59d74813d1ef9f197592924e71e71939ccd7a804..f8058c1fe480266282cdc97544ce3c9ebb6d0dcf 100644 --- a/cpp/src/grpc/milvus.proto +++ b/cpp/src/grpc/milvus.proto @@ -17,9 +17,7 @@ message TableName { */ message TableSchema { TableName table_name = 1; - int32 index_type = 2; - int64 dimension = 3; - bool store_raw_vector = 4; + int64 dimension = 2; } /** @@ -122,10 +120,12 @@ message Command { /** * @brief Index + * @index_type: 0-invalid, 1-idmap, 2-ivflat, 3-ivfsq8, 4-nsgmix + * @metric_type: 1-L2, 2-IP */ message Index { int32 index_type = 1; - int64 nlist = 2; + int32 nlist = 2; int32 index_file_size = 3; int32 metric_type = 4; } diff --git a/cpp/src/grpc/status.proto b/cpp/src/grpc/status.proto index 4e8dc15b74320e6d2bd1137a10c240b80f5fb5c4..f5b65c5f77236bdeea4c60b7fd6d6e225ba7cd85 100644 --- a/cpp/src/grpc/status.proto +++ b/cpp/src/grpc/status.proto @@ -25,6 +25,8 @@ enum ErrorCode { CANNOT_DELETE_FOLDER = 19; CANNOT_DELETE_FILE = 20; BUILD_INDEX_ERROR = 21; + ILLEGAL_NLIST = 22; + ILLEGAL_METRIC_TYPE = 23; } message Status { diff --git a/cpp/src/metrics/MetricBase.h b/cpp/src/metrics/MetricBase.h index 23a2427b3537f66a0747c1d6ba25c172014bc922..a11bf14179aa6576f71f33a7724a9f7ca72c9f5d 100644 --- a/cpp/src/metrics/MetricBase.h +++ b/cpp/src/metrics/MetricBase.h @@ -31,7 +31,8 @@ class MetricsBase{ virtual void IndexFileSizeHistogramObserve(double value) {}; virtual void BuildIndexDurationSecondsHistogramObserve(double value) {}; - virtual void CacheUsageGaugeSet(double value) {}; + virtual void CpuCacheUsageGaugeSet(double value) {}; + virtual void GpuCacheUsageGaugeSet(double value) {}; virtual void MetaAccessTotalIncrement(double value = 1) {}; virtual void MetaAccessDurationSecondsHistogramObserve(double value) {}; diff --git a/cpp/src/metrics/Metrics.cpp b/cpp/src/metrics/Metrics.cpp index 925bb4cd5c89de9a3c43ef7c185c7d4543f0d8c6..23fb0a15b867019629a6d82a16bf368a438688a5 100644 --- a/cpp/src/metrics/Metrics.cpp +++ b/cpp/src/metrics/Metrics.cpp @@ -3,36 +3,29 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ - - #include "Metrics.h" #include "PrometheusMetrics.h" + namespace zilliz { namespace milvus { namespace server { MetricsBase & -Metrics::CreateMetricsCollector(MetricCollectorType collector_type) { - switch (collector_type) { - case MetricCollectorType::PROMETHEUS: - static PrometheusMetrics instance = PrometheusMetrics::GetInstance(); - return instance; - default:return MetricsBase::GetInstance(); - } +Metrics::GetInstance() { + static MetricsBase &instance = CreateMetricsCollector(); + return instance; } MetricsBase & -Metrics::GetInstance() { +Metrics::CreateMetricsCollector() { ConfigNode &config = ServerConfig::GetInstance().GetConfig(CONFIG_METRIC); - std::string collector_typr_str = config.GetValue(CONFIG_METRIC_COLLECTOR); + std::string collector_type_str = config.GetValue(CONFIG_METRIC_COLLECTOR); - if (collector_typr_str == "prometheus") { - return CreateMetricsCollector(MetricCollectorType::PROMETHEUS); - } else if (collector_typr_str == "zabbix") { - return CreateMetricsCollector(MetricCollectorType::ZABBIX); + if (collector_type_str == "prometheus") { + return PrometheusMetrics::GetInstance(); } else { - return CreateMetricsCollector(MetricCollectorType::INVALID); + return MetricsBase::GetInstance(); } } diff --git a/cpp/src/metrics/Metrics.h b/cpp/src/metrics/Metrics.h index be796eb9c4518d845e23259c1b35ddf0884d9988..65df7140cc6d4e8259a8b61d92fd278a8cdfa62c 100644 --- a/cpp/src/metrics/Metrics.h +++ b/cpp/src/metrics/Metrics.h @@ -5,22 +5,14 @@ ******************************************************************************/ #pragma once -#include "utils/Error.h" -#include -#include - - -#pragma once - #include "MetricBase.h" -//#include "PrometheusMetrics.h" + namespace zilliz { namespace milvus { namespace server { #define METRICS_NOW_TIME std::chrono::system_clock::now() -//#define server::Metrics::GetInstance() server::Metrics::GetInstance() #define METRICS_MICROSECONDS(a, b) (std::chrono::duration_cast (b-a)).count(); enum class MetricCollectorType { @@ -31,15 +23,13 @@ enum class MetricCollectorType { class Metrics { public: - static MetricsBase & - CreateMetricsCollector(MetricCollectorType collector_type); + static MetricsBase &GetInstance(); - static MetricsBase & - GetInstance(); + private: + static MetricsBase &CreateMetricsCollector(); }; - } } } diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index c7729ffdbca62408ffad4ecb8379266a690b03ca..08dad64724bf55be408c821f6b12b792bf522f57 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -4,6 +4,7 @@ * Proprietary and confidential. ******************************************************************************/ +#include #include "PrometheusMetrics.h" #include "utils/Log.h" #include "SystemInfo.h" @@ -166,6 +167,18 @@ void PrometheusMetrics::CPUTemperature() { } } +void PrometheusMetrics::GpuCacheUsageGaugeSet(double value) { + if(!startup_) return; + int64_t num_processors = server::SystemInfo::GetInstance().num_processor(); + + for (auto i = 0; i < num_processors; ++i) { +// int gpu_cache_usage = cache::GpuCacheMgr::GetInstance(i)->CacheUsage(); +// int gpu_cache_total = cache::GpuCacheMgr::GetInstance(i)->CacheCapacity(); +// prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}}); +// gpu_cache.Set(gpu_cache_usage * 100 / gpu_cache_total); + } +} + } } } diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index 282c58800ca5bf08ecdd9f0af123ee943dbf2904..ab37195583b87cc666d1a18725d540538be220d2 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -54,7 +54,8 @@ class PrometheusMetrics: public MetricsBase { void RawFileSizeHistogramObserve(double value) override { if(startup_) raw_files_size_histogram_.Observe(value);}; void IndexFileSizeHistogramObserve(double value) override { if(startup_) index_files_size_histogram_.Observe(value);}; void BuildIndexDurationSecondsHistogramObserve(double value) override { if(startup_) build_index_duration_seconds_histogram_.Observe(value);}; - void CacheUsageGaugeSet(double value) override { if(startup_) cache_usage_gauge_.Set(value);}; + void CpuCacheUsageGaugeSet(double value) override { if(startup_) cpu_cache_usage_gauge_.Set(value);}; + void GpuCacheUsageGaugeSet(double value) override; void MetaAccessTotalIncrement(double value = 1) override { if(startup_) meta_access_total_.Increment(value);}; void MetaAccessDurationSecondsHistogramObserve(double value) override { if(startup_) meta_access_duration_seconds_histogram_.Observe(value);}; @@ -336,12 +337,18 @@ class PrometheusMetrics: public MetricsBase { .Register(*registry_); prometheus::Counter &cache_access_total_ = cache_access_.Add({}); - // record cache usage and % - prometheus::Family &cache_usage_ = prometheus::BuildGauge() + // record CPU cache usage and % + prometheus::Family &cpu_cache_usage_ = prometheus::BuildGauge() .Name("cache_usage_bytes") .Help("current cache usage by bytes") .Register(*registry_); - prometheus::Gauge &cache_usage_gauge_ = cache_usage_.Add({}); + prometheus::Gauge &cpu_cache_usage_gauge_ = cpu_cache_usage_.Add({}); + + //record GPU cache usage and % + prometheus::Family &gpu_cache_usage_ = prometheus::BuildGauge() + .Name("gpu_cache_usage_bytes") + .Help("current gpu cache usage by bytes") + .Register(*registry_); // record query response using Quantiles = std::vector; @@ -360,8 +367,7 @@ class PrometheusMetrics: public MetricsBase { prometheus::Family &query_vector_response_per_second_ = prometheus::BuildGauge() .Name("query_vector_response_per_microsecond") .Help("the number of vectors can be queried every second ") - .Register(*registry_); - prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({}); + .Register(*registry_); prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({}); prometheus::Family &query_response_per_second_ = prometheus::BuildGauge() .Name("query_response_per_microsecond") diff --git a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp index 583a91789768d0f36ab00da1a4175ff6734651ee..f30a23e174d3568797453f3f65602fd56d73a23c 100644 --- a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp +++ b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp @@ -15,6 +15,8 @@ using namespace milvus; +//#define SET_VECTOR_IDS; + namespace { std::string GetTableName(); @@ -24,7 +26,7 @@ namespace { constexpr int64_t NQ = 10; constexpr int64_t TOP_K = 10; constexpr int64_t SEARCH_TARGET = 5000; //change this value, result is different - constexpr int64_t ADD_VECTOR_LOOP = 1; + constexpr int64_t ADD_VECTOR_LOOP = 10; constexpr int64_t SECONDS_EACH_HOUR = 3600; #define BLOCK_SPLITER std::cout << "===========================================" << std::endl; @@ -32,9 +34,7 @@ namespace { void PrintTableSchema(const TableSchema& tb_schema) { BLOCK_SPLITER std::cout << "Table name: " << tb_schema.table_name << std::endl; - std::cout << "Table index type: " << (int)tb_schema.index_type << std::endl; std::cout << "Table dimension: " << tb_schema.dimension << std::endl; - std::cout << "Table store raw data: " << (tb_schema.store_raw_vector ? "true" : "false") << std::endl; BLOCK_SPLITER } @@ -93,9 +93,7 @@ namespace { TableSchema BuildTableSchema() { TableSchema tb_schema; tb_schema.table_name = TABLE_NAME; - tb_schema.index_type = IndexType::gpu_ivfflat; tb_schema.dimension = TABLE_DIMENSION; - tb_schema.store_raw_vector = true; return tb_schema; } @@ -235,59 +233,21 @@ ClientTest::Test(const std::string& address, const std::string& port) { std::cout << "DescribeTable function call status: " << stat.ToString() << std::endl; PrintTableSchema(tb_schema); } -// -// Connection::Destroy(conn); - -// pid_t pid; -// for (int i = 0; i < 5; ++i) { -// pid = fork(); -// if (pid == 0 || pid == -1) { -// break; -// } -// } -// if (pid == -1) { -// std::cout << "fail to fork!\n"; -// exit(1); -// } else if (pid == 0) { -// std::shared_ptr conn = Connection::Create(); -// -// {//connect server -// ConnectParam param = {address, port}; -// Status stat = conn->Connect(param); -// std::cout << "Connect function call status: " << stat.ToString() << std::endl; -// } -// -// {//server version -// std::string version = conn->ServerVersion(); -// std::cout << "Server version: " << version << std::endl; -// } -// Connection::Destroy(conn); -// exit(0); -// } else { -// std::shared_ptr conn = Connection::Create(); -// -// {//connect server -// ConnectParam param = {address, port}; -// Status stat = conn->Connect(param); -// std::cout << "Connect function call status: " << stat.ToString() << std::endl; -// } -// -// {//server version -// std::string version = conn->ServerVersion(); -// std::cout << "Server version: " << version << std::endl; -// } -// Connection::Destroy(conn); -// std::cout << "in main process\n"; -// exit(0); -// } std::vector> search_record_array; {//insert vectors + std::vector record_ids; for (int i = 0; i < ADD_VECTOR_LOOP; i++) {//add vectors std::vector record_array; int64_t begin_index = i * BATCH_ROW_COUNT; BuildVectors(begin_index, begin_index + BATCH_ROW_COUNT, record_array); - std::vector record_ids; + +#ifdef SET_VECTOR_IDS + record_ids.resize(ADD_VECTOR_LOOP * BATCH_ROW_COUNT); + for (auto j = begin_index; j CountTable(TABLE_NAME, row_count); + std::cout << TABLE_NAME << "(" << row_count << " rows)" << std::endl; DoSearch(conn, search_record_array, "Search without index"); } {//wait unit build index finish -// std::cout << "Wait until build all index done" << std::endl; -// Status stat = conn->CreateIndex(); -// std::cout << "BuildIndex function call status: " << stat.ToString() << std::endl; + std::cout << "Wait until create all index done" << std::endl; + IndexParam index; + index.table_name = TABLE_NAME; + index.index_type = IndexType::gpu_ivfflat; + index.nlist = 1000; + index.index_file_size = 1024; + index.metric_type = 1; + Status stat = conn->CreateIndex(index); + std::cout << "CreateIndex function call status: " << stat.ToString() << std::endl; + + IndexParam index2; + stat = conn->DescribeIndex(TABLE_NAME, index2); + std::cout << "DescribeIndex function call status: " << stat.ToString() << std::endl; } {//preload table @@ -326,6 +300,24 @@ ClientTest::Test(const std::string& address, const std::string& port) { DoSearch(conn, search_record_array, "Search after build index finish"); } + {//delete index + Status stat = conn->DropIndex(TABLE_NAME); + std::cout << "DropIndex function call status: " << stat.ToString() << std::endl; + + int64_t row_count = 0; + stat = conn->CountTable(TABLE_NAME, row_count); + std::cout << TABLE_NAME << "(" << row_count << " rows)" << std::endl; + } + + {//delete by range + Range rg; + rg.start_value = CurrentTmDate(-2); + rg.end_value = CurrentTmDate(-3); + + Status stat = conn->DeleteByRange(rg, TABLE_NAME); + std::cout << "DeleteByRange function call status: " << stat.ToString() << std::endl; + } + {//delete table Status stat = conn->DropTable(TABLE_NAME); std::cout << "DeleteTable function call status: " << stat.ToString() << std::endl; diff --git a/cpp/src/sdk/grpc/ClientProxy.cpp b/cpp/src/sdk/grpc/ClientProxy.cpp index 898f1c952885cdea57688a88ae5e0aba86ba6a32..6421a34d198af568e56043ffe827ddf9db24f60f 100644 --- a/cpp/src/sdk/grpc/ClientProxy.cpp +++ b/cpp/src/sdk/grpc/ClientProxy.cpp @@ -82,9 +82,7 @@ ClientProxy::CreateTable(const TableSchema ¶m) { try { ::milvus::grpc::TableSchema schema; schema.mutable_table_name()->set_table_name(param.table_name); - schema.set_index_type((int) param.index_type); schema.set_dimension(param.dimension); - schema.set_store_raw_vector(param.store_raw_vector); return client_ptr_->CreateTable(schema); } catch (std::exception &ex) { @@ -119,6 +117,10 @@ ClientProxy::CreateIndex(const IndexParam &index_param) { ::milvus::grpc::IndexParam grpc_index_param; grpc_index_param.mutable_table_name()->set_table_name( index_param.table_name); + grpc_index_param.mutable_index()->set_index_type((int32_t)index_param.index_type); + grpc_index_param.mutable_index()->set_nlist(index_param.nlist); + grpc_index_param.mutable_index()->set_index_file_size(index_param.index_file_size); + grpc_index_param.mutable_index()->set_metric_type(index_param.metric_type); return client_ptr_->CreateIndex(grpc_index_param); } catch (std::exception &ex) { @@ -187,15 +189,20 @@ ClientProxy::Insert(const std::string &table_name, } } - ::milvus::grpc::VectorIds vector_ids; - //Single thread - client_ptr_->Insert(vector_ids, insert_param, status); - auto finish = std::chrono::high_resolution_clock::now(); - - for (size_t i = 0; i < vector_ids.vector_id_array_size(); i++) { - id_array.push_back(vector_ids.vector_id_array(i)); + ::milvus::grpc::VectorIds vector_ids; + if (!id_array.empty()) { + for (auto i = 0; i < id_array.size(); i++) { + insert_param.add_row_id_array(id_array[i]); + } + client_ptr_->Insert(vector_ids, insert_param, status); + } else { + client_ptr_->Insert(vector_ids, insert_param, status); + for (size_t i = 0; i < vector_ids.vector_id_array_size(); i++) { + id_array.push_back(vector_ids.vector_id_array(i)); + } } + #endif } catch (std::exception &ex) { @@ -264,9 +271,7 @@ ClientProxy::DescribeTable(const std::string &table_name, TableSchema &table_sch Status status = client_ptr_->DescribeTable(grpc_schema, table_name); table_schema.table_name = grpc_schema.table_name().table_name(); - table_schema.index_type = (IndexType) grpc_schema.index_type(); table_schema.dimension = grpc_schema.dimension(); - table_schema.store_raw_vector = grpc_schema.store_raw_vector(); return status; } catch (std::exception &ex) { @@ -325,7 +330,15 @@ ClientProxy::ServerStatus() const { Status ClientProxy::DeleteByRange(milvus::Range &range, const std::string &table_name) { - + try { + ::milvus::grpc::DeleteByRangeParam delete_by_range_param; + delete_by_range_param.set_table_name(table_name); + delete_by_range_param.mutable_range()->set_start_value(range.start_value); + delete_by_range_param.mutable_range()->set_end_value(range.end_value); + return client_ptr_->DeleteByRange(delete_by_range_param); + } catch (std::exception &ex) { + return Status(StatusCode::UnknownError, "fail to delete by range: " + std::string(ex.what())); + } } Status @@ -336,18 +349,39 @@ ClientProxy::PreloadTable(const std::string &table_name) const { Status status = client_ptr_->PreloadTable(grpc_table_name); return status; } catch (std::exception &ex) { - return Status(StatusCode::UnknownError, "fail to show tables: " + std::string(ex.what())); + return Status(StatusCode::UnknownError, "fail to preload tables: " + std::string(ex.what())); } } -IndexParam -ClientProxy::DescribeIndex(const std::string &table_name) const { +Status +ClientProxy::DescribeIndex(const std::string &table_name, IndexParam &index_param) const { + try { + ::milvus::grpc::TableName grpc_table_name; + grpc_table_name.set_table_name(table_name); + ::milvus::grpc::IndexParam grpc_index_param; + Status status = client_ptr_->DescribeIndex(grpc_table_name, grpc_index_param); + index_param.index_type = (IndexType)(grpc_index_param.mutable_index()->index_type()); + index_param.nlist = grpc_index_param.mutable_index()->nlist(); + index_param.index_file_size = grpc_index_param.mutable_index()->index_file_size(); + index_param.metric_type = grpc_index_param.mutable_index()->metric_type(); + return status; + + } catch (std::exception &ex) { + return Status(StatusCode::UnknownError, "fail to describe index: " + std::string(ex.what())); + } } Status ClientProxy::DropIndex(const std::string &table_name) const { - + try { + ::milvus::grpc::TableName grpc_table_name; + grpc_table_name.set_table_name(table_name); + Status status = client_ptr_->DropIndex(grpc_table_name); + return status; + } catch (std::exception &ex) { + return Status(StatusCode::UnknownError, "fail to drop index: " + std::string(ex.what())); + } } } diff --git a/cpp/src/sdk/grpc/ClientProxy.h b/cpp/src/sdk/grpc/ClientProxy.h index f6a39f04450718c31a526c1571f461027ddea0d6..8a1d34d8e29efcf2d0f17e7cf9cac23c3dd8a8e0 100644 --- a/cpp/src/sdk/grpc/ClientProxy.h +++ b/cpp/src/sdk/grpc/ClientProxy.h @@ -75,8 +75,8 @@ public: virtual Status PreloadTable(const std::string &table_name) const override; - virtual IndexParam - DescribeIndex(const std::string &table_name) const override; + virtual Status + DescribeIndex(const std::string &table_name, IndexParam &index_param) const override; virtual Status DropIndex(const std::string &table_name) const override; diff --git a/cpp/src/sdk/grpc/GrpcClient.cpp b/cpp/src/sdk/grpc/GrpcClient.cpp index 00894ea5291e7ebf6d3f4b8880a8c18adcd11e50..49ce3556567cc94fc86bd36311e12249de45f168 100644 --- a/cpp/src/sdk/grpc/GrpcClient.cpp +++ b/cpp/src/sdk/grpc/GrpcClient.cpp @@ -264,10 +264,63 @@ GrpcClient::PreloadTable(milvus::grpc::TableName &table_name) { return Status::OK(); } +Status +GrpcClient::DeleteByRange(grpc::DeleteByRangeParam &delete_by_range_param) { + ClientContext context; + ::milvus::grpc::Status response; + ::grpc::Status grpc_status = stub_->DeleteByRange(&context, delete_by_range_param, &response); + + if (!grpc_status.ok()) { + std::cerr << "DeleteByRange gRPC failed!" << std::endl; + return Status(StatusCode::RPCFailed, grpc_status.error_message()); + } + + if (response.error_code() != grpc::SUCCESS) { + std::cerr << response.reason() << std::endl; + return Status(StatusCode::ServerFailed, response.reason()); + } + return Status::OK(); +} + Status GrpcClient::Disconnect() { stub_.release(); return Status::OK(); } +Status +GrpcClient::DescribeIndex(grpc::TableName &table_name, grpc::IndexParam &index_param) { + ClientContext context; + ::grpc::Status grpc_status = stub_->DescribeIndex(&context, table_name, &index_param); + + if (!grpc_status.ok()) { + std::cerr << "DescribeIndex rpc failed!" << std::endl; + return Status(StatusCode::RPCFailed, grpc_status.error_message()); + } + if (index_param.mutable_table_name()->status().error_code() != grpc::SUCCESS) { + std::cerr << index_param.mutable_table_name()->status().reason() << std::endl; + return Status(StatusCode::ServerFailed, index_param.mutable_table_name()->status().reason()); + } + + return Status::OK(); +} + +Status +GrpcClient::DropIndex(grpc::TableName &table_name) { + ClientContext context; + ::milvus::grpc::Status response; + ::grpc::Status grpc_status = stub_->DropIndex(&context, table_name, &response); + + if (!grpc_status.ok()) { + std::cerr << "DropIndex gRPC failed!" << std::endl; + return Status(StatusCode::RPCFailed, grpc_status.error_message()); + } + + if (response.error_code() != grpc::SUCCESS) { + std::cerr << response.reason() << std::endl; + return Status(StatusCode::ServerFailed, response.reason()); + } + return Status::OK(); +} + } \ No newline at end of file diff --git a/cpp/src/sdk/include/MilvusApi.h b/cpp/src/sdk/include/MilvusApi.h index cb261743e136692ebf2bdf8e85e45b84fe8db8c0..766bcbad1d04b3021cd4ec15acbdf937fb332e00 100644 --- a/cpp/src/sdk/include/MilvusApi.h +++ b/cpp/src/sdk/include/MilvusApi.h @@ -76,9 +76,10 @@ struct TopKQueryResult { */ struct IndexParam { std::string table_name; - int32_t index_type; - int64_t nlist; + IndexType index_type; + int32_t nlist; int32_t index_file_size; + int32_t metric_type; }; /** @@ -354,8 +355,8 @@ class Connection { * * @return index informations and indicate if this operation is successful. */ - virtual IndexParam - DescribeIndex(const std::string &table_name) const = 0; + virtual Status + DescribeIndex(const std::string &table_name, IndexParam &index_param) const = 0; /** * @brief drop index diff --git a/cpp/src/sdk/interface/ConnectionImpl.cpp b/cpp/src/sdk/interface/ConnectionImpl.cpp index b496d1c1049c4fb968c8aa59bb90e78ca15e12a1..0f3080574fba174483e85ad9122742c04e759f11 100644 --- a/cpp/src/sdk/interface/ConnectionImpl.cpp +++ b/cpp/src/sdk/interface/ConnectionImpl.cpp @@ -117,7 +117,7 @@ ConnectionImpl::ServerStatus() const { Status ConnectionImpl::DeleteByRange(Range &range, const std::string &table_name) { - + return client_proxy_->DeleteByRange(range, table_name); } Status @@ -125,14 +125,14 @@ ConnectionImpl::PreloadTable(const std::string &table_name) const { return client_proxy_->PreloadTable(table_name); } -IndexParam -ConnectionImpl::DescribeIndex(const std::string &table_name) const { - +Status +ConnectionImpl::DescribeIndex(const std::string &table_name, IndexParam& index_param) const { + return client_proxy_->DescribeIndex(table_name, index_param); } Status ConnectionImpl::DropIndex(const std::string &table_name) const { - + return client_proxy_->DropIndex(table_name); } } diff --git a/cpp/src/sdk/interface/ConnectionImpl.h b/cpp/src/sdk/interface/ConnectionImpl.h index 24be6060baa933b9861a4501687c327e31b01109..d304736b6598181b62ed7f307164971fe72bb5ab 100644 --- a/cpp/src/sdk/interface/ConnectionImpl.h +++ b/cpp/src/sdk/interface/ConnectionImpl.h @@ -81,8 +81,8 @@ public: virtual Status PreloadTable(const std::string &table_name) const override; - virtual IndexParam - DescribeIndex(const std::string &table_name) const override; + virtual Status + DescribeIndex(const std::string &table_name, IndexParam& index_param) const override; virtual Status DropIndex(const std::string &table_name) const override; diff --git a/cpp/src/sdk/thrift/ClientProxy.cpp b/cpp/src/sdk/thrift/ClientProxy.cpp index dff5b98eb6fbff545e74154632c668634a647dd6..c258aeefad21af40ff75546fb1d8a78d50d0ed4a 100644 --- a/cpp/src/sdk/thrift/ClientProxy.cpp +++ b/cpp/src/sdk/thrift/ClientProxy.cpp @@ -334,8 +334,7 @@ Status ClientProxy::PreloadTable(const std::string &table_name) const { return Status::OK(); } -IndexParam ClientProxy::DescribeIndex(const std::string &table_name) const { - IndexParam index_param; +Status ClientProxy::DescribeIndex(const std::string &table_name, IndexParam &index_param) const { index_param.table_name = table_name; return index_param; } diff --git a/cpp/src/sdk/thrift/ClientProxy.h b/cpp/src/sdk/thrift/ClientProxy.h index 60e070ba8cd4de443f0f8d8cc611c08becc3e9e0..27dafc92a61a2ce8321a326e973ccbead3a5c928 100644 --- a/cpp/src/sdk/thrift/ClientProxy.h +++ b/cpp/src/sdk/thrift/ClientProxy.h @@ -55,7 +55,7 @@ public: virtual Status PreloadTable(const std::string &table_name) const override; - virtual IndexParam DescribeIndex(const std::string &table_name) const override; + virtual Status DescribeIndex(const std::string &table_name, IndexParam &index_param) const override; virtual Status DropIndex(const std::string &table_name) const override; diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index 3236ea5cdc86979524cde48a0fd3b8a9614477a9..6a76399d42c92e856f353ddf4cb04e9cd8414cc9 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -36,8 +36,10 @@ static const char* CONFIG_LOG = "log_config"; static const char* CONFIG_CACHE = "cache_config"; static const char* CONFIG_CPU_CACHE_CAPACITY = "cpu_cache_capacity"; static const char* CONFIG_GPU_CACHE_CAPACITY = "gpu_cache_capacity"; -static const char* CACHE_FREE_PERCENT = "cache_free_percent"; +static const char* CACHE_FREE_PERCENT = "cpu_cache_free_percent"; static const char* CONFIG_INSERT_CACHE_IMMEDIATELY = "insert_cache_immediately"; +static const char* CONFIG_GPU_IDS = "gpu_ids"; +static const char *GPU_CACHE_FREE_PERCENT = "gpu_cache_free_percent"; static const char* CONFIG_LICENSE = "license_config"; static const char* CONFIG_LICENSE_PATH = "license_path"; @@ -48,12 +50,14 @@ static const char* CONFIG_METRIC_COLLECTOR = "collector"; static const char* CONFIG_PROMETHEUS = "prometheus_config"; static const char* CONFIG_METRIC_PROMETHEUS_PORT = "port"; -static const char* CONFIG_ENGINE = "engine_config"; -static const char* CONFIG_NPROBE = "nprobe"; -static const char* CONFIG_NLIST = "nlist"; -static const char* CONFIG_DCBT = "use_blas_threshold"; -static const char* CONFIG_METRICTYPE = "metric_type"; -static const char* CONFIG_OMP_THREAD_NUM = "omp_thread_num"; +static const std::string CONFIG_ENGINE = "engine_config"; +static const std::string CONFIG_NPROBE = "nprobe"; +static const std::string CONFIG_NLIST = "nlist"; +static const std::string CONFIG_DCBT = "use_blas_threshold"; +static const std::string CONFIG_METRICTYPE = "metric_type"; +static const std::string CONFIG_OMP_THREAD_NUM = "omp_thread_num"; +static const std::string CONFIG_USE_HYBRID_INDEX = "use_hybrid_index"; +static const std::string CONFIG_HYBRID_INDEX_GPU = "hybrid_index_gpu"; class ServerConfig { public: diff --git a/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp b/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp index 584023e4fc888a515da080d3acd5506f2cd2db07..be0c6adebb094dd621704c0c3d7c9c08002ea5b2 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp @@ -42,7 +42,6 @@ GrpcRequestHandler::HasTable(::grpc::ServerContext *context, GrpcRequestHandler::DropTable(::grpc::ServerContext *context, const ::milvus::grpc::TableName *request, ::milvus::grpc::Status *response) { - BaseTaskPtr task_ptr = DropTableTask::Create(request->table_name()); GrpcRequestScheduler::ExecTask(task_ptr, response); return ::grpc::Status::OK; @@ -168,7 +167,12 @@ GrpcRequestHandler::Cmd(::grpc::ServerContext *context, GrpcRequestHandler::DeleteByRange(::grpc::ServerContext *context, const ::milvus::grpc::DeleteByRangeParam *request, ::milvus::grpc::Status *response) { - + BaseTaskPtr task_ptr = DeleteByRangeTask::Create(*request); + ::milvus::grpc::Status grpc_status; + GrpcRequestScheduler::ExecTask(task_ptr, &grpc_status); + response->set_error_code(grpc_status.error_code()); + response->set_reason(grpc_status.reason()); + return ::grpc::Status::OK; } ::grpc::Status @@ -187,14 +191,24 @@ GrpcRequestHandler::PreloadTable(::grpc::ServerContext *context, GrpcRequestHandler::DescribeIndex(::grpc::ServerContext *context, const ::milvus::grpc::TableName *request, ::milvus::grpc::IndexParam *response) { - + BaseTaskPtr task_ptr = DescribeIndexTask::Create(request->table_name(), *response); + ::milvus::grpc::Status grpc_status; + GrpcRequestScheduler::ExecTask(task_ptr, &grpc_status); + response->mutable_table_name()->mutable_status()->set_reason(grpc_status.reason()); + response->mutable_table_name()->mutable_status()->set_error_code(grpc_status.error_code()); + return ::grpc::Status::OK; } ::grpc::Status GrpcRequestHandler::DropIndex(::grpc::ServerContext *context, const ::milvus::grpc::TableName *request, ::milvus::grpc::Status *response) { - + BaseTaskPtr task_ptr = DropIndexTask::Create(request->table_name()); + ::milvus::grpc::Status grpc_status; + GrpcRequestScheduler::ExecTask(task_ptr, &grpc_status); + response->set_reason(grpc_status.reason()); + response->set_error_code(grpc_status.error_code()); + return ::grpc::Status::OK; } diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp index 6d2842a34ba4db731be0c314f08a8ff6ebf7da60..95ceaffd42e8396961b639e4af533e307a028692 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp @@ -130,17 +130,10 @@ CreateTableTask::OnExecute() { return SetError(res, "Invalid table dimension: " + std::to_string(schema_.dimension())); } - res = ValidationUtil::ValidateTableIndexType(schema_.index_type()); - if (res != SERVER_SUCCESS) { - return SetError(res, "Invalid index type: " + std::to_string(schema_.index_type())); - } - //step 2: construct table schema engine::meta::TableSchema table_info; table_info.dimension_ = (uint16_t) schema_.dimension(); table_info.table_id_ = schema_.table_name().table_name(); - table_info.engine_type_ = (int) EngineType(schema_.index_type()); - table_info.store_raw_data_ = schema_.store_raw_vector(); //step 3: create table engine::Status stat = DBWrapper::DB()->CreateTable(table_info); @@ -190,10 +183,7 @@ DescribeTableTask::OnExecute() { } schema_.mutable_table_name()->set_table_name(table_info.table_id_); - - schema_.set_index_type(IndexType((engine::EngineType) table_info.engine_type_)); schema_.set_dimension(table_info.dimension_); - schema_.set_store_raw_vector(table_info.store_raw_data_); } catch (std::exception &ex) { return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); @@ -237,8 +227,33 @@ CreateIndexTask::OnExecute() { return SetError(SERVER_TABLE_NOT_EXIST, "Table " + table_name_ + " not exists"); } + res = ValidationUtil::ValidateTableIndexType(index_param_.mutable_index()->index_type()); + if(res != SERVER_SUCCESS) { + return SetError(res, "Invalid index type: " + std::to_string(index_param_.mutable_index()->index_type())); + } + + res = ValidationUtil::ValidateTableIndexNlist(index_param_.mutable_index()->nlist()); + if(res != SERVER_SUCCESS) { + return SetError(res, "Invalid index nlist: " + std::to_string(index_param_.mutable_index()->nlist())); + } + + res = ValidationUtil::ValidateTableIndexMetricType(index_param_.mutable_index()->metric_type()); + if(res != SERVER_SUCCESS) { + return SetError(res, "Invalid index metric type: " + std::to_string(index_param_.mutable_index()->metric_type())); + } + + res = ValidationUtil::ValidateTableIndexFileSize(index_param_.mutable_index()->index_file_size()); + if(res != SERVER_SUCCESS) { + return SetError(res, "Invalid index file size: " + std::to_string(index_param_.mutable_index()->index_file_size())); + } + //step 2: check table existence - stat = DBWrapper::DB()->BuildIndex(table_name_); + engine::TableIndex index; + index.engine_type_ = index_param_.mutable_index()->index_type(); + index.nlist_ = index_param_.mutable_index()->nlist(); + index.index_file_size_ = index_param_.mutable_index()->index_file_size(); + index.metric_type_ = index_param_.mutable_index()->metric_type(); + stat = DBWrapper::DB()->CreateIndex(table_name_, index); if (!stat.ok()) { return SetError(SERVER_BUILD_INDEX_ERROR, "Engine failed: " + stat.ToString()); } @@ -381,9 +396,9 @@ InsertTask::InsertTask(const ::milvus::grpc::InsertParam &insert_param, } BaseTaskPtr -InsertTask::Create(const ::milvus::grpc::InsertParam &insert_infos, +InsertTask::Create(const ::milvus::grpc::InsertParam &insert_param, ::milvus::grpc::VectorIds &record_ids) { - return std::shared_ptr(new InsertTask(insert_infos, record_ids)); + return std::shared_ptr(new InsertTask(insert_param, record_ids)); } ServerError @@ -400,6 +415,13 @@ InsertTask::OnExecute() { return SetError(SERVER_INVALID_ROWRECORD_ARRAY, "Row record array is empty"); } + if (!record_ids_.vector_id_array().empty()) { + if (record_ids_.vector_id_array().size() != insert_param_.row_record_array_size()) { + return SetError(SERVER_ILLEGAL_VECTOR_ID, + "Size of vector ids is not equal to row record array size"); + } + } + //step 2: check table existence engine::meta::TableSchema table_info; table_info.table_id_ = insert_param_.table_name(); @@ -426,30 +448,32 @@ InsertTask::OnExecute() { // TODO: change to one dimension array in protobuf or use multiple-thread to copy the data for (size_t i = 0; i < insert_param_.row_record_array_size(); i++) { - for (size_t j = 0; j < table_info.dimension_; j++) { - if (insert_param_.row_record_array(i).vector_data().empty()) { - return SetError(SERVER_INVALID_ROWRECORD_ARRAY, "Row record float array is empty"); - } - uint64_t vec_dim = insert_param_.row_record_array(i).vector_data().size(); - if (vec_dim != table_info.dimension_) { - ServerError error_code = SERVER_INVALID_VECTOR_DIMENSION; - std::string error_msg = "Invalid rowrecord dimension: " + std::to_string(vec_dim) - + " vs. table dimension:" + - std::to_string(table_info.dimension_); - return SetError(error_code, error_msg); - } - vec_f[i * table_info.dimension_ + j] = insert_param_.row_record_array(i).vector_data(j); + if (insert_param_.row_record_array(i).vector_data().empty()) { + return SetError(SERVER_INVALID_ROWRECORD_ARRAY, "Row record float array is empty"); + } + uint64_t vec_dim = insert_param_.row_record_array(i).vector_data().size(); + if (vec_dim != table_info.dimension_) { + ServerError error_code = SERVER_INVALID_VECTOR_DIMENSION; + std::string error_msg = "Invalid rowrecord dimension: " + std::to_string(vec_dim) + + " vs. table dimension:" + + std::to_string(table_info.dimension_); + return SetError(error_code, error_msg); } + memcpy(&vec_f[i * table_info.dimension_], + insert_param_.row_record_array(i).vector_data().data(), + table_info.dimension_ * sizeof(float)); } rc.ElapseFromBegin("prepare vectors data"); //step 4: insert vectors auto vec_count = (uint64_t) insert_param_.row_record_array_size(); - std::vector vec_ids(record_ids_.vector_id_array_size(), 0); + std::vector vec_ids(insert_param_.row_id_array_size(), 0); + for (auto i = 0; i < insert_param_.row_id_array_size(); i++) { + vec_ids[i] = insert_param_.row_id_array(i); + } - stat = DBWrapper::DB()->InsertVectors(insert_param_.table_name(), vec_count, vec_f.data(), - vec_ids); + stat = DBWrapper::DB()->InsertVectors(insert_param_.table_name(), vec_count, vec_f.data(), vec_ids); rc.ElapseFromBegin("add vectors to engine"); if (!stat.ok()) { return SetError(SERVER_CACHE_ERROR, "Cache error: " + stat.ToString()); @@ -711,6 +735,73 @@ CmdTask::OnExecute() { return SERVER_SUCCESS; } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DeleteByRangeTask::DeleteByRangeTask(const ::milvus::grpc::DeleteByRangeParam &delete_by_range_param) + : GrpcBaseTask(DDL_DML_TASK_GROUP), + delete_by_range_param_(delete_by_range_param){ +} + +BaseTaskPtr +DeleteByRangeTask::Create(const ::milvus::grpc::DeleteByRangeParam &delete_by_range_param) { + return std::shared_ptr(new DeleteByRangeTask(delete_by_range_param)); +} + +ServerError +DeleteByRangeTask::OnExecute() { + try { + TimeRecorder rc("DeleteByRangeTask"); + + //step 1: check arguments + std::string table_name = delete_by_range_param_.table_name(); + ServerError res = ValidationUtil::ValidateTableName(table_name); + if (res != SERVER_SUCCESS) { + return SetError(res, "Invalid table name: " + table_name); + } + + //step 2: check table existence + engine::meta::TableSchema table_info; + table_info.table_id_ = table_name; + engine::Status stat = DBWrapper::DB()->DescribeTable(table_info); + if (!stat.ok()) { + if (stat.IsNotFound()) { + return SetError(SERVER_TABLE_NOT_EXIST, "Table " + table_name + " not exists"); + } else { + return SetError(DB_META_TRANSACTION_FAILED, "Engine failed: " + stat.ToString()); + } + } + + rc.ElapseFromBegin("check validation"); + + //step 3: check date range, and convert to db dates + std::vector dates; + ServerError error_code = SERVER_SUCCESS; + std::string error_msg; + + std::vector<::milvus::grpc::Range> range_array; + range_array.emplace_back(delete_by_range_param_.range()); + ConvertTimeRangeToDBDates(range_array, dates, error_code, error_msg); + if (error_code != SERVER_SUCCESS) { + return SetError(error_code, error_msg); + } + +#ifdef MILVUS_ENABLE_PROFILING + std::string fname = "/tmp/search_nq_" + std::to_string(this->record_array_.size()) + + "_top_" + std::to_string(this->top_k_) + "_" + + GetCurrTimeStr() + ".profiling"; + ProfilerStart(fname.c_str()); +#endif + engine::Status status = DBWrapper::DB()->DeleteTable(table_name, dates); + if (!status.ok()) { + return SetError(DB_META_TRANSACTION_FAILED, "Engine failed: " + stat.ToString()); + } + + } catch (std::exception &ex) { + return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); + } + + return SERVER_SUCCESS; +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// PreloadTableTask::PreloadTableTask(const std::string &table_name) : GrpcBaseTask(DDL_DML_TASK_GROUP), @@ -748,7 +839,89 @@ PreloadTableTask::OnExecute() { return SERVER_SUCCESS; } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DescribeIndexTask::DescribeIndexTask(const std::string &table_name, + ::milvus::grpc::IndexParam &index_param) + : GrpcBaseTask(DDL_DML_TASK_GROUP), + table_name_(table_name), + index_param_(index_param) { + +} + +BaseTaskPtr +DescribeIndexTask::Create(const std::string &table_name, + ::milvus::grpc::IndexParam &index_param){ + return std::shared_ptr(new DescribeIndexTask(table_name, index_param)); +} + +ServerError +DescribeIndexTask::OnExecute() { + try { + TimeRecorder rc("DescribeIndexTask"); + //step 1: check arguments + ServerError res = ValidationUtil::ValidateTableName(table_name_); + if (res != SERVER_SUCCESS) { + return SetError(res, "Invalid table name: " + table_name_); + } + + //step 2: check table existence + engine::TableIndex index; + engine::Status stat = DBWrapper::DB()->DescribeIndex(table_name_, index); + if (!stat.ok()) { + return SetError(DB_META_TRANSACTION_FAILED, "Engine failed: " + stat.ToString()); + } + + index_param_.mutable_table_name()->set_table_name(table_name_); + index_param_.mutable_index()->set_index_type(index.engine_type_); + index_param_.mutable_index()->set_nlist(index.nlist_); + index_param_.mutable_index()->set_index_file_size(index.index_file_size_); + index_param_.mutable_index()->set_metric_type(index.metric_type_); + + rc.ElapseFromBegin("totally cost"); + } catch (std::exception &ex) { + return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); + } + + return SERVER_SUCCESS; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DropIndexTask::DropIndexTask(const std::string &table_name) + : GrpcBaseTask(DDL_DML_TASK_GROUP), + table_name_(table_name) { + +} + +BaseTaskPtr +DropIndexTask::Create(const std::string &table_name){ + return std::shared_ptr(new DropIndexTask(table_name)); +} + +ServerError +DropIndexTask::OnExecute() { + try { + TimeRecorder rc("DropIndexTask"); + + //step 1: check arguments + ServerError res = ValidationUtil::ValidateTableName(table_name_); + if (res != SERVER_SUCCESS) { + return SetError(res, "Invalid table name: " + table_name_); + } + + //step 2: check table existence + engine::Status stat = DBWrapper::DB()->DropIndex(table_name_); + if (!stat.ok()) { + return SetError(DB_META_TRANSACTION_FAILED, "Engine failed: " + stat.ToString()); + } + + rc.ElapseFromBegin("totally cost"); + } catch (std::exception &ex) { + return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); + } + + return SERVER_SUCCESS; +} } } diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.h b/cpp/src/server/grpc_impl/GrpcRequestTask.h index 9a60064e49653e62fcd826f4435a2a0eec7c0e4e..e43b9fba6056f2ed4faec659f093ad2b4886448a 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.h +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.h @@ -260,6 +260,9 @@ public: protected: DropIndexTask(const std::string &table_name); + ServerError + OnExecute() override; + private: std::string table_name_; diff --git a/cpp/src/utils/Error.h b/cpp/src/utils/Error.h index c264e60b08d184ea12c3c071c095a54accf48846..e62e4d3271531ea7905989208c12b35f3ca8e857 100644 --- a/cpp/src/utils/Error.h +++ b/cpp/src/utils/Error.h @@ -51,6 +51,9 @@ constexpr ServerError SERVER_ILLEGAL_SEARCH_RESULT = ToGlobalServerErrorCode(110 constexpr ServerError SERVER_CACHE_ERROR = ToGlobalServerErrorCode(111); constexpr ServerError SERVER_WRITE_ERROR = ToGlobalServerErrorCode(112); constexpr ServerError SERVER_INVALID_NPROBE = ToGlobalServerErrorCode(113); +constexpr ServerError SERVER_INVALID_INDEX_NLIST = ToGlobalServerErrorCode(114); +constexpr ServerError SERVER_INVALID_INDEX_METRIC_TYPE = ToGlobalServerErrorCode(115); +constexpr ServerError SERVER_INVALID_INDEX_FILE_SIZE = ToGlobalServerErrorCode(116); constexpr ServerError SERVER_LICENSE_FILE_NOT_EXIST = ToGlobalServerErrorCode(500); diff --git a/cpp/src/utils/ValidationUtil.cpp b/cpp/src/utils/ValidationUtil.cpp index fa0cd8397d6f2b6f486f5d5a2f12f0dbeaaa4b3b..1ac997f1ab4706b01240596ad5db029d63217c08 100644 --- a/cpp/src/utils/ValidationUtil.cpp +++ b/cpp/src/utils/ValidationUtil.cpp @@ -10,6 +10,7 @@ namespace server { constexpr size_t table_name_size_limit = 255; constexpr int64_t table_dimension_limit = 16384; +constexpr int32_t index_file_size_limit = 4096; //index trigger size max = 4096 MB ServerError ValidationUtil::ValidateTableName(const std::string &table_name) { @@ -65,6 +66,32 @@ ValidationUtil::ValidateTableIndexType(int32_t index_type) { return SERVER_SUCCESS; } +ServerError +ValidationUtil::ValidateTableIndexNlist(int32_t nlist) { + if(nlist <= 0) { + return SERVER_INVALID_INDEX_NLIST; + } + + return SERVER_SUCCESS; +} + +ServerError +ValidationUtil::ValidateTableIndexFileSize(int32_t index_file_size) { + if(index_file_size <= 0 || index_file_size > index_file_size_limit) { + return SERVER_INVALID_INDEX_FILE_SIZE; + } + + return SERVER_SUCCESS; +} + +ServerError +ValidationUtil::ValidateTableIndexMetricType(int32_t metric_type) { + if(metric_type != (int32_t)engine::MetricType::L2 && metric_type != (int32_t)engine::MetricType::IP) { + return SERVER_INVALID_INDEX_METRIC_TYPE; + } + return SERVER_SUCCESS; +} + ServerError ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { int num_devices = 0; diff --git a/cpp/src/utils/ValidationUtil.h b/cpp/src/utils/ValidationUtil.h index 1f90fac273acbc1f47f9e13de969eab7108b0eee..2c90d99dd4d1bfdb5e6304f92592a3177f8636a7 100644 --- a/cpp/src/utils/ValidationUtil.h +++ b/cpp/src/utils/ValidationUtil.h @@ -17,6 +17,15 @@ public: static ServerError ValidateTableIndexType(int32_t index_type); + static ServerError + ValidateTableIndexNlist(int32_t nlist); + + static ServerError + ValidateTableIndexFileSize(int32_t index_file_size); + + static ServerError + ValidateTableIndexMetricType(int32_t metric_type); + static ServerError ValidateGpuIndex(uint32_t gpu_index); diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp deleted file mode 100644 index 4b10c1e6860885fecf0583d9d3d0cf6baeeaf691..0000000000000000000000000000000000000000 --- a/cpp/src/wrapper/Index.cpp +++ /dev/null @@ -1,84 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -// TODO: maybe support static search -#ifdef GPU_VERSION -#include "faiss/gpu/GpuAutoTune.h" -#include "faiss/gpu/StandardGpuResources.h" -#include "faiss/gpu/utils/DeviceUtils.h" -#endif - -#include "Index.h" -#include "faiss/index_io.h" -#include "faiss/IndexIVF.h" -#include "faiss/IVFlib.h" -#include "server/ServerConfig.h" - -namespace zilliz { -namespace milvus { -namespace engine { - -using std::string; -using std::unordered_map; -using std::vector; - -Index::Index(const std::shared_ptr &raw_index) { - index_ = raw_index; - dim = index_->d; - ntotal = index_->ntotal; - store_on_gpu = false; -} - -bool Index::reset() { - try { - index_->reset(); - ntotal = index_->ntotal; - } - catch (std::exception &e) { -// LOG(ERROR) << e.what(); - return false; - } - return true; -} - -bool Index::add_with_ids(idx_t n, const float *xdata, const long *xids) { - try { - index_->add_with_ids(n, xdata, xids); - ntotal += n; - } - catch (std::exception &e) { -// LOG(ERROR) << e.what(); - return false; - } - return true; -} - -bool Index::search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const { - try { - index_->search(n, data, k, distances, labels); - } - catch (std::exception &e) { -// LOG(ERROR) << e.what(); - return false; - } - return true; -} - -void write_index(const Index_ptr &index, const std::string &file_name) { - write_index(index->index_.get(), file_name.c_str()); -} - -Index_ptr read_index(const std::string &file_name) { - std::shared_ptr raw_index = nullptr; - raw_index.reset(faiss::read_index(file_name.c_str())); - return std::make_shared(raw_index); -} - -} -} -} -#endif diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h deleted file mode 100644 index 1668059d11c89b17984f1c6454ee80de89198527..0000000000000000000000000000000000000000 --- a/cpp/src/wrapper/Index.h +++ /dev/null @@ -1,89 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#pragma once - -//#include -//#include -//#include -//#include -//#include -// -//#include "faiss/AutoTune.h" -//#include "faiss/index_io.h" -// -//#include "Operand.h" - -#include "knowhere/vec_index.h" - - -namespace zilliz { -namespace milvus { -namespace engine { - -using Index_ptr = VecIndexPtr; - -#if 0 -//class Index; -//using Index_ptr = std::shared_ptr; - -class Index { - typedef long idx_t; - -public: - int dim; ///< std::vector dimension - idx_t ntotal; ///< total nb of indexed std::vectors - bool store_on_gpu; - - explicit Index(const std::shared_ptr &raw_index); - - virtual bool reset(); - - /** - * @brief Same as add, but stores xids instead of sequential ids. - * - * @param data input matrix, size n * d - * @param if ids is not empty ids for the std::vectors - */ - virtual bool add_with_ids(idx_t n, const float *xdata, const long *xids); - - /** - * @brief for each query std::vector, find its k nearest neighbors in the database - * - * @param n queries size - * @param data query std::vectors - * @param k top k nearest neighbors - * @param distances top k nearest distances - * @param labels neighbors of the queries - */ - virtual bool search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const; - - //virtual bool search(idx_t n, const std::vector &data, idx_t k, - // std::vector &distances, std::vector &labels) const; - - //virtual bool remove_ids(const faiss::IDSelector &sel, long &nremove, long &location); - //virtual bool remove_ids_range(const faiss::IDSelector &sel, long &nremove); - //virtual bool index_display(); - - virtual std::shared_ptr data() { return index_; } - - virtual const std::shared_ptr& data() const { return index_; } - -private: - friend void write_index(const Index_ptr &index, const std::string &file_name); - std::shared_ptr index_ = nullptr; -}; - - -void write_index(const Index_ptr &index, const std::string &file_name); - -extern Index_ptr read_index(const std::string &file_name); -#endif - - -} -} -} diff --git a/cpp/src/wrapper/IndexBuilder.cpp b/cpp/src/wrapper/IndexBuilder.cpp deleted file mode 100644 index 095341ecc70c5db6c24c5bcf8e8888de2102726f..0000000000000000000000000000000000000000 --- a/cpp/src/wrapper/IndexBuilder.cpp +++ /dev/null @@ -1,147 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -#include "mutex" - - -#ifdef GPU_VERSION -#include -#include -#include -#endif - - -#include -#include - - -#include "server/ServerConfig.h" -#include "IndexBuilder.h" - - -namespace zilliz { -namespace milvus { -namespace engine { - -class GpuResources { - public: - static GpuResources &GetInstance() { - static GpuResources instance; - return instance; - } - - void SelectGpu() { - using namespace zilliz::milvus::server; - ServerConfig &config = ServerConfig::GetInstance(); - ConfigNode server_config = config.GetConfig(CONFIG_SERVER); - gpu_num = server_config.GetInt32Value(server::CONFIG_GPU_INDEX, 0); - } - - int32_t GetGpu() { - return gpu_num; - } - - private: - GpuResources() : gpu_num(0) { SelectGpu(); } - - private: - int32_t gpu_num; -}; - -using std::vector; - -static std::mutex gpu_resource; -static std::mutex cpu_resource; - -IndexBuilder::IndexBuilder(const Operand_ptr &opd) { - opd_ = opd; -} - -// Default: build use gpu -Index_ptr IndexBuilder::build_all(const long &nb, - const float *xb, - const long *ids, - const long &nt, - const float *xt) { - std::shared_ptr host_index = nullptr; -#ifdef GPU_VERSION - { - LOG(DEBUG) << "Build index by GPU"; - // TODO: list support index-type. - faiss::MetricType metric_type = opd_->metric_type == "L2" ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT; - faiss::Index *ori_index = faiss::index_factory(opd_->d, opd_->get_index_type(nb).c_str(), metric_type); - - std::lock_guard lk(gpu_resource); - faiss::gpu::StandardGpuResources res; - auto device_index = faiss::gpu::index_cpu_to_gpu(&res, GpuResources::GetInstance().GetGpu(), ori_index); - if (!device_index->is_trained) { - nt == 0 || xt == nullptr ? device_index->train(nb, xb) - : device_index->train(nt, xt); - } - device_index->add_with_ids(nb, xb, ids); // TODO: support with add_with_IDMAP - - host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index)); - - delete device_index; - delete ori_index; - } -#else - { - LOG(DEBUG) << "Build index by CPU"; - faiss::MetricType metric_type = opd_->metric_type == "L2" ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT; - faiss::Index *index = faiss::index_factory(opd_->d, opd_->get_index_type(nb).c_str(), metric_type); - if (!index->is_trained) { - nt == 0 || xt == nullptr ? index->train(nb, xb) - : index->train(nt, xt); - } - index->add_with_ids(nb, xb, ids); - host_index.reset(index); - } -#endif - - return std::make_shared(host_index); -} - -Index_ptr IndexBuilder::build_all(const long &nb, const vector &xb, - const vector &ids, - const long &nt, const vector &xt) { - return build_all(nb, xb.data(), ids.data(), nt, xt.data()); -} - -BgCpuBuilder::BgCpuBuilder(const zilliz::milvus::engine::Operand_ptr &opd) : IndexBuilder(opd) {}; - -Index_ptr BgCpuBuilder::build_all(const long &nb, const float *xb, const long *ids, const long &nt, const float *xt) { - std::shared_ptr index = nullptr; - faiss::MetricType metric_type = opd_->metric_type == "L2" ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT; - index.reset(faiss::index_factory(opd_->d, opd_->get_index_type(nb).c_str(), metric_type)); - - LOG(DEBUG) << "Build index by CPU"; - { - std::lock_guard lk(cpu_resource); - if (!index->is_trained) { - nt == 0 || xt == nullptr ? index->train(nb, xb) - : index->train(nt, xt); - } - index->add_with_ids(nb, xb, ids); - } - - return std::make_shared(index); -} - -IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) { - if (opd->index_type == "IDMap") { - IndexBuilderPtr index = nullptr; - return std::make_shared(opd); - } - - return std::make_shared(opd); -} - -} -} -} -#endif diff --git a/cpp/src/wrapper/IndexBuilder.h b/cpp/src/wrapper/IndexBuilder.h deleted file mode 100644 index 4cb6de814b0ad03d6e00fd27b3f1c9601b1dd2b6..0000000000000000000000000000000000000000 --- a/cpp/src/wrapper/IndexBuilder.h +++ /dev/null @@ -1,68 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -#pragma once - -#include "faiss/Index.h" - -#include "Operand.h" -#include "Index.h" - - -namespace zilliz { -namespace milvus { -namespace engine { - -class IndexBuilder { - public: - explicit IndexBuilder(const Operand_ptr &opd); - - virtual Index_ptr build_all(const long &nb, - const float *xb, - const long *ids, - const long &nt = 0, - const float *xt = nullptr); - - virtual Index_ptr build_all(const long &nb, - const std::vector &xb, - const std::vector &ids, - const long &nt = 0, - const std::vector &xt = std::vector()); - - //void train(const long &nt, - // const std::vector &xt); - // - //Index_ptr add(const long &nb, - // const std::vector &xb, - // const std::vector &ids); - // - //void set_build_option(const Operand_ptr &opd); - - - protected: - Operand_ptr opd_ = nullptr; -}; - -class BgCpuBuilder : public IndexBuilder { - public: - BgCpuBuilder(const Operand_ptr &opd); - - virtual Index_ptr build_all(const long &nb, - const float *xb, - const long *ids, - const long &nt = 0, - const float *xt = nullptr) override; -}; - -using IndexBuilderPtr = std::shared_ptr; - -extern IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd); - -} -} -} -#endif diff --git a/cpp/src/wrapper/Operand.cpp b/cpp/src/wrapper/Operand.cpp deleted file mode 100644 index 8bc708eb72544db51c564cf742fea669e41cf5ce..0000000000000000000000000000000000000000 --- a/cpp/src/wrapper/Operand.cpp +++ /dev/null @@ -1,123 +0,0 @@ - -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -#include "Operand.h" - - -namespace zilliz { -namespace milvus { -namespace engine { - -using std::string; - -enum IndexType { - Invalid_Option = 0, - IVF = 1, - IDMAP = 2, - IVFSQ8 = 3, -}; - -IndexType resolveIndexType(const string &index_type) { - if (index_type == "IVF") { return IndexType::IVF; } - if (index_type == "IDMap") { return IndexType::IDMAP; } - if (index_type == "IVFSQ8") { return IndexType::IVFSQ8; } - return IndexType::Invalid_Option; -} - -int CalcBacketCount(int nb, size_t nlist) { - int backet_count = int(nb / 1000000.0 * nlist); - if(backet_count == 0) { - backet_count = 1; //avoid faiss rash - } - - return backet_count; -} - -// nb at least 100 -string Operand::get_index_type(const int &nb) { - if (!index_str.empty()) { return index_str; } - - switch (resolveIndexType(index_type)) { - case Invalid_Option: { - // TODO: add exception - break; - } - case IVF: { - - using namespace zilliz::milvus::server; - ServerConfig &config = ServerConfig::GetInstance(); - ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); - size_t nlist = engine_config.GetInt32Value(CONFIG_NLIST, 16384); - - index_str += (ncent != 0 ? index_type + std::to_string(ncent) : - index_type + std::to_string(CalcBacketCount(nb, nlist))); -// std::cout<<"nlist = "<>(std::istream &is, Operand &obj) { - is >> obj.d - >> obj.index_type - >> obj.metric_type - >> obj.preproc - >> obj.postproc - >> obj.ncent; - return is; -} - -std::string operand_to_str(const Operand_ptr &opd) { - std::ostringstream ss; - ss << *opd; - return ss.str(); -} - -Operand_ptr str_to_operand(const std::string &input) { - std::istringstream is(input); - auto opd = std::make_shared(); - is >> *(opd.get()); - - return opd; -} - -} -} -} -#endif diff --git a/cpp/src/wrapper/Operand.h b/cpp/src/wrapper/Operand.h deleted file mode 100644 index 0e675f6a1b4b79e7205d8bc31559b2b4ce070d89..0000000000000000000000000000000000000000 --- a/cpp/src/wrapper/Operand.h +++ /dev/null @@ -1,46 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -#pragma once - -#include -#include -#include -#include - - -namespace zilliz { -namespace milvus { -namespace engine { - -struct Operand { - friend std::ostream &operator<<(std::ostream &os, const Operand &obj); - - friend std::istream &operator>>(std::istream &is, Operand &obj); - - int d; - std::string index_type = "IVF"; - std::string metric_type = "L2"; //> L2 / IP(Inner Product) - std::string preproc; - std::string postproc = "Flat"; - std::string index_str; - int ncent = 0; - - std::string get_index_type(const int &nb); -}; - -using Operand_ptr = std::shared_ptr; - -extern std::string operand_to_str(const Operand_ptr &opd); - -extern Operand_ptr str_to_operand(const std::string &input); - - -} -} -} -#endif diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 40ca9378e4f7037210d24b555bee2fb2ad48d640..e15f66c365bc6c640ec38a449d743f4517782df7 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -2,7 +2,7 @@ ARROW_VERSION=zilliz BOOST_VERSION=1.70.0 BZIP2_VERSION=1.0.6 EASYLOGGINGPP_VERSION=v9.96.7 -FAISS_VERSION=v1.5.3 +FAISS_VERSION=branch-0.1.0 MKL_VERSION=2019.4.243 GTEST_VERSION=1.8.1 JSONCONS_VERSION=0.126.0 diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 8b36d2efbdecf911cdeafa3926df44cbb23ee43e..2426846c15c3ffe9321e56915d91ea232372df30 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -9,6 +9,7 @@ #include "db/meta/MetaConsts.h" #include "db/Factories.h" #include "cache/CpuCacheMgr.h" +#include "utils/CommonUtil.h" #include #include @@ -26,6 +27,8 @@ namespace { static constexpr int64_t TABLE_DIM = 256; static constexpr int64_t VECTOR_COUNT = 250000; static constexpr int64_t INSERT_LOOP = 10000; + static constexpr int64_t SECONDS_EACH_HOUR = 3600; + static constexpr int64_t DAY_SECONDS = 24 * 60 * 60; engine::meta::TableSchema BuildTableSchema() { engine::meta::TableSchema table_info; @@ -45,6 +48,52 @@ namespace { } } + std::string CurrentTmDate(int64_t offset_day = 0) { + time_t tt; + time( &tt ); + tt = tt + 8*SECONDS_EACH_HOUR; + tt = tt + 24*SECONDS_EACH_HOUR*offset_day; + tm* t= gmtime( &tt ); + + std::string str = std::to_string(t->tm_year + 1900) + "-" + std::to_string(t->tm_mon + 1) + + "-" + std::to_string(t->tm_mday); + + return str; + } + + void + ConvertTimeRangeToDBDates(const std::string &start_value, + const std::string &end_value, + std::vector &dates) { + dates.clear(); + + time_t tt_start, tt_end; + tm tm_start, tm_end; + if (!zilliz::milvus::server::CommonUtil::TimeStrToTime(start_value, tt_start, tm_start)) { + return; + } + + if (!zilliz::milvus::server::CommonUtil::TimeStrToTime(end_value, tt_end, tm_end)) { + return; + } + + long days = (tt_end > tt_start) ? (tt_end - tt_start) / DAY_SECONDS : (tt_start - tt_end) / + DAY_SECONDS; + if (days == 0) { + return; + } + + for (long i = 0; i < days; i++) { + time_t tt_day = tt_start + DAY_SECONDS * i; + tm tm_day; + zilliz::milvus::server::CommonUtil::ConvertTime(tt_day, tm_day); + + long date = tm_day.tm_year * 10000 + tm_day.tm_mon * 100 + + tm_day.tm_mday;//according to db logic + dates.push_back(date); + } + } + } TEST_F(DBTest, CONFIG_TEST) { @@ -93,6 +142,7 @@ TEST_F(DBTest, CONFIG_TEST) { TEST_F(DBTest, DB_TEST) { + db_->Open(GetOptions(), &db_); engine::meta::TableSchema table_info = BuildTableSchema(); engine::Status stat = db_->CreateTable(table_info); @@ -161,6 +211,11 @@ TEST_F(DBTest, DB_TEST) { } search.join(); + + uint64_t count; + stat = db_->GetTableRowCount(TABLE_NAME, count); + ASSERT_STATS(stat); + ASSERT_TRUE(count > 0); }; TEST_F(DBTest, SEARCH_TEST) { @@ -307,8 +362,6 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { }; TEST_F(DBTest2, DELETE_TEST) { - - engine::meta::TableSchema table_info = BuildTableSchema(); engine::Status stat = db_->CreateTable(table_info); @@ -343,4 +396,45 @@ TEST_F(DBTest2, DELETE_TEST) { db_->HasTable(TABLE_NAME, has_table); ASSERT_FALSE(has_table); -}; \ No newline at end of file +}; + +TEST_F(DBTest2, DELETE_BY_RANGE_TEST) { + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + + bool has_table = false; + db_->HasTable(TABLE_NAME, has_table); + ASSERT_TRUE(has_table); + + engine::IDNumbers vector_ids; + + uint64_t size; + db_->Size(size); + + int64_t nb = INSERT_LOOP; + std::vector xb; + BuildVectors(nb, xb); + + int loop = 20; + for (auto i=0; iInsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + std::vector dates; + std::string start_value = CurrentTmDate(-3); + std::string end_value = CurrentTmDate(-2); + ConvertTimeRangeToDBDates(start_value, end_value, dates); + + db_->DeleteTable(TABLE_NAME, dates); +} \ No newline at end of file diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 1976822e761d20fc538d9cc6c0baecbe8e40fa56..77a83abc4e7e81b67f2e4ca865a618252500fa18 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -65,18 +65,22 @@ TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { engine::VectorSource source(n, vectors.data()); size_t num_vectors_added; - engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_, - table_file_schema.location_, - (engine::EngineType) table_file_schema.engine_type_); - status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added); - ASSERT_TRUE(status.ok()); - - ASSERT_EQ(num_vectors_added, 50); + engine::ExecutionEnginePtr execution_engine_ = + engine::EngineFactory::Build(table_file_schema.dimension_, + table_file_schema.location_, + (engine::EngineType) table_file_schema.engine_type_, + (engine::MetricType)table_file_schema.metric_type_, + table_schema.nlist_); - engine::IDNumbers vector_ids = source.GetVectorIds(); + engine::IDNumbers vector_ids; + status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added, vector_ids); + ASSERT_TRUE(status.ok()); + vector_ids = source.GetVectorIds(); ASSERT_EQ(vector_ids.size(), 50); + ASSERT_EQ(num_vectors_added, 50); - status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added); + vector_ids.clear(); + status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added, vector_ids); ASSERT_TRUE(status.ok()); ASSERT_EQ(num_vectors_added, 50); @@ -84,6 +88,7 @@ TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { vector_ids = source.GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); + status = impl_->DropAll(); ASSERT_TRUE(status.ok()); } @@ -105,12 +110,13 @@ TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) { engine::VectorSource::Ptr source = std::make_shared(n_100, vectors_100.data()); - status = mem_table_file.Add(source); + engine::IDNumbers vector_ids; + status = mem_table_file.Add(source, vector_ids); ASSERT_TRUE(status.ok()); // std::cout << mem_table_file.GetCurrentMem() << " " << mem_table_file.GetMemLeft() << std::endl; - engine::IDNumbers vector_ids = source->GetVectorIds(); + vector_ids = source->GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); size_t singleVectorMem = sizeof(float) * TABLE_DIM; @@ -121,7 +127,8 @@ TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) { BuildVectors(n_max, vectors_128M); engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); - status = mem_table_file.Add(source_128M); + vector_ids.clear(); + status = mem_table_file.Add(source_128M, vector_ids); vector_ids = source_128M->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_max - n_100); @@ -149,9 +156,10 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { engine::MemTable mem_table(TABLE_NAME, impl_, options); - status = mem_table.Add(source_100); + engine::IDNumbers vector_ids; + status = mem_table.Add(source_100, vector_ids); ASSERT_TRUE(status.ok()); - engine::IDNumbers vector_ids = source_100->GetVectorIds(); + vector_ids = source_100->GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); engine::MemTableFile::Ptr mem_table_file; @@ -163,8 +171,9 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { std::vector vectors_128M; BuildVectors(n_max, vectors_128M); + vector_ids.clear(); engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); - status = mem_table.Add(source_128M); + status = mem_table.Add(source_128M, vector_ids); ASSERT_TRUE(status.ok()); vector_ids = source_128M->GetVectorIds(); @@ -181,7 +190,8 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { engine::VectorSource::Ptr source_1G = std::make_shared(n_1G, vectors_1G.data()); - status = mem_table.Add(source_1G); + vector_ids.clear(); + status = mem_table.Add(source_1G, vector_ids); ASSERT_TRUE(status.ok()); vector_ids = source_1G->GetVectorIds(); @@ -193,6 +203,8 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { status = mem_table.Serialize(); ASSERT_TRUE(status.ok()); + + status = impl_->DropAll(); ASSERT_TRUE(status.ok()); } @@ -367,6 +379,113 @@ TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { delete db_; boost::filesystem::remove_all(options.meta.path); - }; +TEST_F(DBTest, VECTOR_IDS_TEST) +{ + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + engine::IDNumbers vector_ids; + + + int64_t nb = 100000; + std::vector xb; + BuildVectors(nb, xb); + + vector_ids.resize(nb); + for (auto i = 0; i < nb; i++) { + vector_ids[i] = i; + } + + stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_EQ(vector_ids[0], 0); + ASSERT_STATS(stat); + + nb = 25000; + xb.clear(); + BuildVectors(nb, xb); + vector_ids.clear(); + vector_ids.resize(nb); + for (auto i = 0; i < nb; i++) { + vector_ids[i] = i + nb; + } + stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_EQ(vector_ids[0], nb); + ASSERT_STATS(stat); + + nb = 262144; //512M + xb.clear(); + BuildVectors(nb, xb); + vector_ids.clear(); + vector_ids.resize(nb); + for (auto i = 0; i < nb; i++) { + vector_ids[i] = i + nb / 2; + } + stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_EQ(vector_ids[0], nb/2); + ASSERT_STATS(stat); + + nb = 65536; //128M + xb.clear(); + BuildVectors(nb, xb); + vector_ids.clear(); + stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_STATS(stat); + + nb = 100; + xb.clear(); + BuildVectors(nb, xb); + vector_ids.clear(); + vector_ids.resize(nb); + for (auto i = 0; i < nb; i++) { + vector_ids[i] = i + nb; + } + stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + for (auto i = 0; i < nb; i++) { + ASSERT_EQ(vector_ids[i], i + nb); + } +} + +TEST_F(NewMemManagerTest, MEMMANAGER_TEST) { + int setenv_res = setenv("MILVUS_USE_OLD_MEM_MANAGER", "ON", 1); + ASSERT_TRUE(setenv_res == 0); + + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + auto start_time = METRICS_NOW_TIME; + + int insert_loop = 20; + for (int i = 0; i < insert_loop; ++i) { + int64_t nb = 40960; + std::vector xb; + BuildVectors(nb, xb); + engine::IDNumbers vector_ids; + engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_TRUE(status.ok()); + } + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + LOG(DEBUG) << "total_time spent in INSERT_TEST (ms) : " << total_time; + + delete db_; + boost::filesystem::remove_all(options.meta.path); +} diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 0e981f6ae420f395d233161263b430051cf16c0d..4dffeb6db05f55afda11553d02a41fdb02765487 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -180,7 +180,7 @@ TEST_F(MetaTest, ARCHIVE_TEST_DISK) { for (auto i=0; itype(), engine::ScheduleTaskType::kIndexLoad); + load_task->Execute(); } TEST(DBSchedulerTest, SEARCH_SCHEDULER_TEST) { diff --git a/cpp/unittest/db/search_test.cpp b/cpp/unittest/db/search_test.cpp index 340fa82f207c64c42a9409d9a03ea6c90a678176..64e03f65ae2e07eabd39035e5867350f72772f9f 100644 --- a/cpp/unittest/db/search_test.cpp +++ b/cpp/unittest/db/search_test.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// #include "db/scheduler/task/SearchTask.h" +#include "server/ServerConfig.h" #include "utils/TimeRecorder.h" #include @@ -213,6 +214,10 @@ TEST(DBSearchTest, MERGE_TEST) { } TEST(DBSearchTest, PARALLEL_CLUSTER_TEST) { + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode& db_config = config.GetConfig(server::CONFIG_DB); + db_config.SetValue(server::CONFIG_DB_PARALLEL_REDUCE, "true"); + bool ascending = true; std::vector target_ids; std::vector target_distence; @@ -245,6 +250,10 @@ TEST(DBSearchTest, PARALLEL_CLUSTER_TEST) { } TEST(DBSearchTest, PARALLEL_TOPK_TEST) { + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode& db_config = config.GetConfig(server::CONFIG_DB); + db_config.SetValue(server::CONFIG_DB_PARALLEL_REDUCE, "true"); + std::vector target_ids; std::vector target_distence; engine::SearchContext::ResultSet src_result; diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index 405b48a602dc59885fe4241cce58f867b961bf61..cfac3ea6e7f2367766f50e01184c1c32947f6432 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -91,9 +91,10 @@ zilliz::milvus::engine::DBMetaOptions DISABLED_MySQLTest::getDBMetaOptions() { zilliz::milvus::engine::DBMetaOptions options; options.path = "/tmp/milvus_test"; options.backend_uri = DBTestEnvironment::getURI(); - + if(options.backend_uri.empty()) { - throw std::exception(); +// throw std::exception(); + options.backend_uri = "mysql://root:Fantast1c@192.168.1.194:3306/"; } return options; @@ -123,6 +124,10 @@ int main(int argc, char **argv) { if (argc > 1) { uri = argv[1]; } + +// if(uri.empty()) { +// uri = "mysql://root:Fantast1c@192.168.1.194:3306/"; +// } // std::cout << uri << std::endl; ::testing::AddGlobalTestEnvironment(new DBTestEnvironment); return RUN_ALL_TESTS(); diff --git a/cpp/unittest/metrics/prometheus_test.cpp b/cpp/unittest/metrics/prometheus_test.cpp index 521e00fc5c91401b90cde3115ca7f11bd59803bf..004e58a5fc5ff26e66fe9699c453062355b0b8e1 100644 --- a/cpp/unittest/metrics/prometheus_test.cpp +++ b/cpp/unittest/metrics/prometheus_test.cpp @@ -54,4 +54,7 @@ TEST(PrometheusTest, PROMETHEUS_TEST){ instance.ConnectionGaugeDecrement(); instance.KeepingAliveCounterIncrement(); instance.OctetsSet(); + instance.CPUCoreUsagePercentSet(); + instance.GPUTemperature(); + instance.CPUTemperature(); } \ No newline at end of file diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp index 4f1d1db4efc3977ccbad934961968430d29f4399..d52e34143a3405d79b1bcde390b1860f445b4fea 100644 --- a/cpp/unittest/server/cache_test.cpp +++ b/cpp/unittest/server/cache_test.cpp @@ -8,7 +8,6 @@ #include "cache/GpuCacheMgr.h" #include "utils/Error.h" -#include "wrapper/Index.h" #include "wrapper/knowhere/vec_index.h" using namespace zilliz::milvus; @@ -112,7 +111,7 @@ TEST(CacheTest, CPU_CACHE_TEST) { for (int i = 0; i < 20; i++) { MockVecIndex* mock_index = new MockVecIndex(); mock_index->ntotal_ = 1000000;//less 1G per index - engine::Index_ptr index(mock_index); + engine::VecIndexPtr index(mock_index); cpu_mgr->InsertItem("index_" + std::to_string(i), index); } @@ -137,7 +136,7 @@ TEST(CacheTest, CPU_CACHE_TEST) { MockVecIndex* mock_index = new MockVecIndex(); mock_index->ntotal_ = 6000000;//6G less - engine::Index_ptr index(mock_index); + engine::VecIndexPtr index(mock_index); cpu_mgr->InsertItem("index_6g", index); ASSERT_EQ(cpu_mgr->ItemCount(), 0);//data greater than capacity can not be inserted sucessfully @@ -147,14 +146,14 @@ TEST(CacheTest, CPU_CACHE_TEST) { } TEST(CacheTest, GPU_CACHE_TEST) { - cache::CacheMgr* gpu_mgr = cache::GpuCacheMgr::GetInstance(); + cache::CacheMgr* gpu_mgr = cache::GpuCacheMgr::GetInstance(0); const int dim = 256; for(int i = 0; i < 20; i++) { MockVecIndex* mock_index = new MockVecIndex(); mock_index->ntotal_ = 1000; - engine::Index_ptr index(mock_index); + engine::VecIndexPtr index(mock_index); cache::DataObjPtr obj = std::make_shared(index); @@ -165,6 +164,25 @@ TEST(CacheTest, GPU_CACHE_TEST) { gpu_mgr->ClearCache(); ASSERT_EQ(gpu_mgr->ItemCount(), 0); + + for (auto i = 0; i < 3; i++) { + // TODO: use gpu index to mock + MockVecIndex *mock_index = new MockVecIndex(); + mock_index->ntotal_ = 1000000; //2G + engine::VecIndexPtr index(mock_index); + cache::DataObjPtr data_obj = std::make_shared(index); + std::cout << data_obj->size() <InsertItem("index_" + std::to_string(i), data_obj); + } + +// ASSERT_EQ(gpu_mgr->ItemCount(), 2); +// auto obj0 = gpu_mgr->GetItem("index_0"); +// ASSERT_EQ(obj0, nullptr); +// auto obj1 = gpu_mgr->GetItem("index_1"); +// auto obj2 = gpu_mgr->GetItem("index_2"); + gpu_mgr->ClearCache(); + ASSERT_EQ(gpu_mgr->ItemCount(), 0); + } TEST(CacheTest, INVALID_TEST) { @@ -175,7 +193,7 @@ TEST(CacheTest, INVALID_TEST) { ASSERT_EQ(mgr.GetItem("test"), nullptr); mgr.InsertItem("test", cache::DataObjPtr()); - mgr.InsertItem("test", engine::Index_ptr(nullptr)); + mgr.InsertItem("test", engine::VecIndexPtr(nullptr)); mgr.EraseItem("test"); mgr.PrintInfo(); mgr.ClearCache(); @@ -189,7 +207,7 @@ TEST(CacheTest, INVALID_TEST) { for(int i = 0; i < 20; i++) { MockVecIndex* mock_index = new MockVecIndex(); mock_index->ntotal_ = 2; - engine::Index_ptr index(mock_index); + engine::VecIndexPtr index(mock_index); cache::DataObjPtr obj = std::make_shared(index); mgr.InsertItem("index_" + std::to_string(i), obj); diff --git a/cpp/unittest/server/config_test.cpp b/cpp/unittest/server/config_test.cpp index 2172bdd977e48cb42bf85cadb9a0815a1e6ae66e..462b813f26c0443cc0e01c344974f0f89bb02f9c 100644 --- a/cpp/unittest/server/config_test.cpp +++ b/cpp/unittest/server/config_test.cpp @@ -4,9 +4,12 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// #include +#include #include "config/IConfigMgr.h" #include "server/ServerConfig.h" +#include "utils/CommonUtil.h" +#include "utils/ValidationUtil.h" using namespace zilliz::milvus; @@ -15,6 +18,10 @@ namespace { static const char* CONFIG_FILE_PATH = "./milvus/conf/server_config.yaml"; static const char* LOG_FILE_PATH = "./milvus/conf/log_config.conf"; +static constexpr uint64_t KB = 1024; +static constexpr uint64_t MB = KB*1024; +static constexpr uint64_t GB = MB*1024; + } TEST(ConfigTest, CONFIG_TEST) { @@ -87,6 +94,9 @@ TEST(ConfigTest, SERVER_CONFIG_TEST) { server::ServerError err = config.LoadConfigFile(CONFIG_FILE_PATH); ASSERT_EQ(err, server::SERVER_SUCCESS); + err = server::ServerConfig::GetInstance().ValidateConfig(); + ASSERT_EQ(err, server::SERVER_SUCCESS); + server::ConfigNode node1 = config.GetConfig("server_config"); server::ConfigNode& node2 = config.GetConfig("cache_config"); node1.Combine(node2); @@ -100,6 +110,43 @@ TEST(ConfigTest, SERVER_CONFIG_TEST) { config.PrintAll(); - const server::ServerConfig const_config = config; - server::ConfigNode node = const_config.GetConfig("aaa"); + unsigned long total_mem = 0, free_mem = 0; + server::CommonUtil::GetSystemMemInfo(total_mem, free_mem); + + size_t gpu_mem = 0; + server::ValidationUtil::GetGpuMemory(0, gpu_mem); + + server::ConfigNode& server_config = config.GetConfig("server_config"); + server::ConfigNode& db_config = config.GetConfig("db_config"); + server::ConfigNode& cache_config = config.GetConfig(server::CONFIG_CACHE); + cache_config.SetValue(server::CACHE_FREE_PERCENT, "2.0"); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + size_t cache_cap = 16; + size_t insert_buffer_size = (total_mem - cache_cap*GB + 1*GB)/GB; + db_config.SetValue(server::CONFIG_DB_INSERT_BUFFER_SIZE, std::to_string(insert_buffer_size)); + cache_config.SetValue(server::CONFIG_CPU_CACHE_CAPACITY, std::to_string(cache_cap)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + cache_cap = total_mem/GB + 2; + cache_config.SetValue(server::CONFIG_CPU_CACHE_CAPACITY, std::to_string(cache_cap)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + size_t index_building_threshold = (gpu_mem + 1*MB)/MB; + db_config.SetValue(server::CONFIG_DB_INDEX_TRIGGER_SIZE, + std::to_string(index_building_threshold)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + insert_buffer_size = total_mem/GB + 2; + db_config.SetValue(server::CONFIG_DB_INSERT_BUFFER_SIZE, std::to_string(insert_buffer_size)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + server_config.SetValue(server::CONFIG_GPU_INDEX, "9999"); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); } \ No newline at end of file diff --git a/cpp/unittest/server/util_test.cpp b/cpp/unittest/server/util_test.cpp index c3a47182eed4ed8622aadfedda6552c988ec1f84..c0b1c83cf8a505db9d62d4c11b6519e49e86ada3 100644 --- a/cpp/unittest/server/util_test.cpp +++ b/cpp/unittest/server/util_test.cpp @@ -178,12 +178,15 @@ TEST(UtilTest, VALIDATE_TABLENAME_TEST) { res = server::ValidationUtil::ValidateTableName(table_name); ASSERT_EQ(res, server::SERVER_INVALID_TABLE_NAME); - table_name = "中文"; + table_name = "_!@#!@"; res = server::ValidationUtil::ValidateTableName(table_name); ASSERT_EQ(res, server::SERVER_INVALID_TABLE_NAME); + table_name = "中文"; + res = server::ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, server::SERVER_INVALID_TABLE_NAME); - table_name = std::string('a', 32768); + table_name = std::string(10000, 'a'); res = server::ValidationUtil::ValidateTableName(table_name); ASSERT_EQ(res, server::SERVER_INVALID_TABLE_NAME); } @@ -196,11 +199,39 @@ TEST(UtilTest, VALIDATE_DIMENSIONTEST) { ASSERT_EQ(server::ValidationUtil::ValidateTableDimension(1), server::SERVER_SUCCESS); } -TEST(UtilTest, VALIDATE_INDEXTYPE_TEST) { +TEST(UtilTest, VALIDATE_INDEX_TEST) { ASSERT_EQ(server::ValidationUtil::ValidateTableIndexType((int)engine::EngineType::INVALID), server::SERVER_INVALID_INDEX_TYPE); for(int i = 1; i <= (int)engine::EngineType::MAX_VALUE; i++) { ASSERT_EQ(server::ValidationUtil::ValidateTableIndexType(i), server::SERVER_SUCCESS); } ASSERT_EQ(server::ValidationUtil::ValidateTableIndexType((int)engine::EngineType::MAX_VALUE + 1), server::SERVER_INVALID_INDEX_TYPE); + + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexNlist(0), server::SERVER_INVALID_INDEX_NLIST); + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexNlist(100), server::SERVER_SUCCESS); + + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexFileSize(0), server::SERVER_INVALID_INDEX_FILE_SIZE); + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexFileSize(100), server::SERVER_SUCCESS); + + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexMetricType(0), server::SERVER_INVALID_INDEX_METRIC_TYPE); + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexMetricType(1), server::SERVER_SUCCESS); + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexMetricType(2), server::SERVER_SUCCESS); +} + +TEST(ValidationUtilTest, ValidateGpuTest) { + ASSERT_EQ(server::ValidationUtil::ValidateGpuIndex(0), server::SERVER_SUCCESS); + ASSERT_NE(server::ValidationUtil::ValidateGpuIndex(100), server::SERVER_SUCCESS); + + size_t memory = 0; + ASSERT_EQ(server::ValidationUtil::GetGpuMemory(0, memory), server::SERVER_SUCCESS); + ASSERT_NE(server::ValidationUtil::GetGpuMemory(100, memory), server::SERVER_SUCCESS); } +TEST(UtilTest, TIMERECORDER_TEST) { + for(int64_t log_level = 0; log_level <= 6; log_level++) { + if(log_level == 5) { + continue; //skip fatal + } + server::TimeRecorder rc("time", log_level); + rc.RecordSection("end"); + } +}