From a88f5b6cbb84772dedb04de2947aab930f89406d Mon Sep 17 00:00:00 2001 From: "shengjun.li" Date: Tue, 21 Jul 2020 14:26:08 +0800 Subject: [PATCH] Fix index size (#2941) * add virtual IF UpdateIndexSize Signed-off-by: shengjun.li * update ivf index size Signed-off-by: sahuang * fix PQ logic Signed-off-by: sahuang * fix index size of index hnsw, annoy and nsg Signed-off-by: cmli * add GetSize() interface 4 SPTAG Signed-off-by: cmli * fix binary ivf Signed-off-by: shengjun.li Co-authored-by: sahuang Co-authored-by: cmli --- CHANGELOG.md | 7 +++--- .../default/DefaultVectorIndexFormat.cpp | 3 ++- core/src/db/engine/ExecutionEngineImpl.cpp | 2 +- .../index/vector_index/ConfAdapter.cpp | 1 - .../index/vector_index/IndexAnnoy.cpp | 8 +++++++ .../knowhere/index/vector_index/IndexAnnoy.h | 3 +++ .../index/vector_index/IndexBinaryIVF.cpp | 14 +++++++++++ .../index/vector_index/IndexBinaryIVF.h | 3 +++ .../knowhere/index/vector_index/IndexHNSW.cpp | 8 +++++++ .../knowhere/index/vector_index/IndexHNSW.h | 3 +++ .../knowhere/index/vector_index/IndexIVF.cpp | 13 +++++++++++ .../knowhere/index/vector_index/IndexIVF.h | 3 +++ .../index/vector_index/IndexIVFPQ.cpp | 23 +++++++++++++++++++ .../knowhere/index/vector_index/IndexIVFPQ.h | 3 +++ .../index/vector_index/IndexIVFSQ.cpp | 15 ++++++++++++ .../knowhere/index/vector_index/IndexIVFSQ.h | 3 +++ .../knowhere/index/vector_index/IndexNSG.cpp | 8 +++++++ .../knowhere/index/vector_index/IndexNSG.h | 3 +++ .../index/vector_index/IndexSPTAG.cpp | 8 +++++++ .../knowhere/index/vector_index/IndexSPTAG.h | 3 +++ .../knowhere/index/vector_index/VecIndex.h | 4 ++++ .../vector_index/gpu/IndexIVFSQHybrid.cpp | 15 ++++++++++++ .../index/vector_index/gpu/IndexIVFSQHybrid.h | 3 +++ .../index/vector_index/impl/nsg/NSG.cpp | 16 +++++++++++++ .../index/vector_index/impl/nsg/NSG.h | 3 +++ .../SPTAG/AnnService/inc/Core/BKT/Index.h | 1 + .../SPTAG/AnnService/inc/Core/KDT/Index.h | 1 + .../SPTAG/AnnService/inc/Core/VectorIndex.h | 1 + .../src/index/thirdparty/annoy/src/annoylib.h | 9 ++++++++ core/src/index/thirdparty/hnswlib/hnswalg.h | 20 ++++++++++++---- .../thirdparty/hnswlib/visited_list_pool.h | 7 ++++++ 31 files changed, 204 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 076710d4..40596c0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Please mark all change in change log and use the issue from GitHub # Milvus 0.10.2 (TBD) ## Bug +- \#2890 Fix the wrong index size ## Feature @@ -16,9 +17,10 @@ Please mark all change in change log and use the issue from GitHub ## Bug - \#2487 Enlarge timeout value for creating collection +- \#2487 HotFix release lock failed on NAS - \#2557 Fix random crash of INSERT_DUPLICATE_ID case - \#2578 Result count doesn't match target vectors count -- \#2585 IVF_PQ on GPU with using metric_type IP +- \#2585 Support IVF_PQ IP on GPU - \#2598 Fix Milvus docker image report illegal instruction - \#2617 Fix HNSW and RNSG index files size - \#2637 Suit the range of HNSW parameters @@ -29,11 +31,10 @@ Please mark all change in change log and use the issue from GitHub - \#2739 Fix mishards start failed - \#2752 Milvus formats vectors data to double-precision and return to http client - \#2767 Fix a bug of getting wrong nprobe limitation in knowhere on GPU version -- \#2768 After building the index,the number of vectors increases +- \#2768 After building the index, the number of vectors increases - \#2774 Server down during loading data - \#2776 Fix too many data copies during creating IVF index - \#2813 To implemente RNSG IP -- \#2487 HotFix release lock failed on NAS ## Feature diff --git a/core/src/codecs/default/DefaultVectorIndexFormat.cpp b/core/src/codecs/default/DefaultVectorIndexFormat.cpp index d543192d..e513d2ab 100644 --- a/core/src/codecs/default/DefaultVectorIndexFormat.cpp +++ b/core/src/codecs/default/DefaultVectorIndexFormat.cpp @@ -92,7 +92,8 @@ DefaultVectorIndexFormat::read_internal(const storage::FSHandlerPtr& fs_ptr, con vec_index_factory.CreateVecIndex(knowhere::OldIndexTypeToStr(current_type), knowhere::IndexMode::MODE_CPU); if (index != nullptr) { index->Load(load_data_list); - index->SetIndexSize(length); + index->UpdateIndexSize(); + LOG_ENGINE_DEBUG_ << "index file size " << length << " index size " << index->IndexSize(); } else { LOG_ENGINE_ERROR_ << "Fail to create vector index: " << path; } diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 519792f4..4970b094 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -367,7 +367,7 @@ ExecutionEngineImpl::Serialize() { // here we reset index size by file size, // since some index type(such as SQ8) data size become smaller after serialized - index_->SetIndexSize(server::CommonUtil::GetFileSize(location_)); + index_->UpdateIndexSize(); LOG_ENGINE_DEBUG_ << "Finish serialize index file: " << location_ << " size: " << index_->Size(); if (index_->Size() == 0) { diff --git a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp index 79afbc7b..682ff507 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp @@ -118,7 +118,6 @@ IVFConfAdapter::CheckSearch(Config& oricfg, const IndexType type, const IndexMod } else { CheckIntByRange(knowhere::IndexParams::nprobe, MIN_NPROBE, MAX_NPROBE); } - CheckIntByRange(knowhere::IndexParams::nprobe, MIN_NPROBE, MAX_NPROBE); return ConfAdapter::CheckSearch(oricfg, type, mode); } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.cpp index 990f8ebf..d344106f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.cpp @@ -162,5 +162,13 @@ IndexAnnoy::Dim() { return index_->get_dim(); } +void +IndexAnnoy::UpdateIndexSize() { + if (!index_) { + KNOWHERE_THROW_MSG("index not initialize"); + } + index_size_ = index_->cal_size(); +} + } // namespace knowhere } // namespace milvus diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.h index fa78743e..7b86dc53 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.h @@ -62,6 +62,9 @@ class IndexAnnoy : public VecIndex { int64_t Dim() override; + void + UpdateIndexSize() override; + private: MetricType metric_type_; std::shared_ptr> index_ = nullptr; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexBinaryIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexBinaryIVF.cpp index 8036a428..bc06346e 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexBinaryIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexBinaryIVF.cpp @@ -145,6 +145,20 @@ BinaryIVF::Dim() { return index_->d; } +void +BinaryIVF::UpdateIndexSize() { + if (!index_) { + KNOWHERE_THROW_MSG("index not initialize"); + } + auto bin_ivf_index = dynamic_cast(index_.get()); + auto nb = bin_ivf_index->invlists->compute_ntotal(); + auto nlist = bin_ivf_index->nlist; + auto code_size = bin_ivf_index->code_size; + + // binary ivf codes, ids and quantizer + index_size_ = nb * code_size + nb * sizeof(int64_t) + nlist * code_size; +} + void BinaryIVF::Train(const DatasetPtr& dataset_ptr, const Config& config) { GETTENSORWITHIDS(dataset_ptr) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexBinaryIVF.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexBinaryIVF.h index ae099142..80697044 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexBinaryIVF.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexBinaryIVF.h @@ -73,6 +73,9 @@ class BinaryIVF : public VecIndex, public FaissBaseBinaryIndex { int64_t Dim() override; + void + UpdateIndexSize() override; + #if 0 DatasetPtr GetVectorById(const DatasetPtr& dataset_ptr, const Config& config); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexHNSW.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexHNSW.cpp index 758ef74a..0b9989ba 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexHNSW.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexHNSW.cpp @@ -205,5 +205,13 @@ IndexHNSW::Dim() { return (*(size_t*)index_->dist_func_param_); } +void +IndexHNSW::UpdateIndexSize() { + if (!index_) { + KNOWHERE_THROW_MSG("index not initialize"); + } + index_size_ = index_->cal_size(); +} + } // namespace knowhere } // namespace milvus diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexHNSW.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexHNSW.h index 9a21797f..576f2c48 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexHNSW.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexHNSW.h @@ -54,6 +54,9 @@ class IndexHNSW : public VecIndex { int64_t Dim() override; + void + UpdateIndexSize() override; + private: bool normalize = false; std::mutex mutex_; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index fdd8265c..357046b5 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -239,6 +239,19 @@ IVF::Seal() { SealImpl(); } +void +IVF::UpdateIndexSize() { + if (!index_) { + KNOWHERE_THROW_MSG("index not initialize"); + } + auto ivf_index = dynamic_cast(index_.get()); + auto nb = ivf_index->invlists->compute_ntotal(); + auto nlist = ivf_index->nlist; + auto code_size = ivf_index->code_size; + // ivf codes, ivf ids and quantizer + index_size_ = nb * code_size + nb * sizeof(int64_t) + nlist * code_size; +} + VecIndexPtr IVF::CopyCpuToGpu(const int64_t device_id, const Config& config) { #ifdef MILVUS_GPU_VERSION diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h index 291efe74..d33481ed 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h @@ -64,6 +64,9 @@ class IVF : public VecIndex, public FaissBaseIndex { int64_t Dim() override; + void + UpdateIndexSize() override; + #if 0 DatasetPtr GetVectorById(const DatasetPtr& dataset, const Config& config) override; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp index 662fefba..ab12bc83 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp @@ -73,5 +73,28 @@ IVFPQ::GenParams(const Config& config) { return params; } +void +IVFPQ::UpdateIndexSize() { + if (!index_) { + KNOWHERE_THROW_MSG("index not initialize"); + } + auto ivfpq_index = dynamic_cast(index_.get()); + auto nb = ivfpq_index->invlists->compute_ntotal(); + auto code_size = ivfpq_index->code_size; + auto pq = ivfpq_index->pq; + auto nlist = ivfpq_index->nlist; + auto d = ivfpq_index->d; + + // ivf codes, ivf ids and quantizer + auto capacity = nb * code_size + nb * sizeof(int64_t) + nlist * d * sizeof(float); + auto centroid_table = pq.M * pq.ksub * pq.dsub * sizeof(float); + auto precomputed_table = nlist * pq.M * pq.ksub * sizeof(float); + if (precomputed_table > ivfpq_index->precomputed_table_max_bytes) { + // will not precompute table + precomputed_table = 0; + } + index_size_ = capacity + centroid_table + precomputed_table; +} + } // namespace knowhere } // namespace milvus diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.h index 8582a6ea..aed40720 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.h @@ -35,6 +35,9 @@ class IVFPQ : public IVF { VecIndexPtr CopyCpuToGpu(const int64_t, const Config&) override; + void + UpdateIndexSize() override; + protected: std::shared_ptr GenParams(const Config& config) override; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp index 51ae44cb..5ceb732c 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp @@ -16,6 +16,7 @@ #include #include #endif +#include #include #include @@ -62,5 +63,19 @@ IVFSQ::CopyCpuToGpu(const int64_t device_id, const Config& config) { #endif } +void +IVFSQ::UpdateIndexSize() { + if (!index_) { + KNOWHERE_THROW_MSG("index not initialize"); + } + auto ivfsq_index = dynamic_cast(index_.get()); + auto nb = ivfsq_index->invlists->compute_ntotal(); + auto code_size = ivfsq_index->code_size; + auto nlist = ivfsq_index->nlist; + auto d = ivfsq_index->d; + // ivf codes, ivf ids, sq trained vectors and quantizer + index_size_ = nb * code_size + nb * sizeof(int64_t) + 2 * d * sizeof(float) + nlist * d * sizeof(float); +} + } // namespace knowhere } // namespace milvus diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.h index 927ceb90..0c33eda5 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.h @@ -34,6 +34,9 @@ class IVFSQ : public IVF { VecIndexPtr CopyCpuToGpu(const int64_t, const Config&) override; + + void + UpdateIndexSize() override; }; using IVFSQPtr = std::shared_ptr; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index 4ba06a29..32e87ca7 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -172,5 +172,13 @@ NSG::Dim() { return index_->dimension; } +void +NSG::UpdateIndexSize() { + if (!index_) { + KNOWHERE_THROW_MSG("index not initialize"); + } + index_size_ = index_->GetSize(); +} + } // namespace knowhere } // namespace milvus diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.h index 03b0adbd..2cbc8369 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.h @@ -67,6 +67,9 @@ class NSG : public VecIndex { int64_t Dim() override; + void + UpdateIndexSize() override; + private: std::mutex mutex_; int64_t gpu_; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp index 951a5fc3..2dc86678 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp @@ -214,6 +214,14 @@ CPUSPTAGRNG::Dim() { return index_ptr_->GetFeatureDim(); } +void +CPUSPTAGRNG::UpdateIndexSize() { + if (!index_ptr_) { + KNOWHERE_THROW_MSG("index not initialize"); + } + index_size_ = index_ptr_->GetIndexSize(); +} + // void // CPUSPTAGRNG::Add(const DatasetPtr& origin, const Config& add_config) { // SetParameters(add_config); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.h index 25361ae8..945f3369 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.h @@ -60,6 +60,9 @@ class CPUSPTAGRNG : public VecIndex { int64_t Dim() override; + void + UpdateIndexSize() override; + private: void SetParameters(const Config& config); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/VecIndex.h b/core/src/index/knowhere/knowhere/index/vector_index/VecIndex.h index 9f49166f..b567e87e 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/VecIndex.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/VecIndex.h @@ -129,6 +129,10 @@ class VecIndex : public Index { index_size_ = size; } + virtual void + UpdateIndexSize() { + } + int64_t Size() override { return BlacklistSize() + UidsSize() + IndexSize(); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp index dde0fcd2..e95ae9f8 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp @@ -10,6 +10,7 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License +#include #include #include #include @@ -261,6 +262,20 @@ IVFSQHybrid::QueryImpl(int64_t n, const float* data, int64_t k, float* distances } } +void +IVFSQHybrid::UpdateIndexSize() { + if (!index_) { + KNOWHERE_THROW_MSG("index not initialize"); + } + auto ivfsqh_index = dynamic_cast(index_.get()); + auto nb = ivfsqh_index->invlists->compute_ntotal(); + auto code_size = ivfsqh_index->code_size; + auto nlist = ivfsqh_index->nlist; + auto d = ivfsqh_index->d; + // ivf codes, ivf ids, sq trained vectors and quantizer + index_size_ = nb * code_size + nb * sizeof(int64_t) + 2 * d * sizeof(float) + nlist * d * sizeof(float); +} + FaissIVFQuantizer::~FaissIVFQuantizer() { if (quantizer != nullptr) { delete quantizer; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h b/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h index 9d092618..4aeb7f68 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h @@ -77,6 +77,9 @@ class IVFSQHybrid : public GPUIVFSQ { void UnsetQuantizer(); + void + UpdateIndexSize() override; + protected: BinarySet SerializeImpl(const IndexType&) override; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.cpp index d24a0378..bb8ce84f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.cpp @@ -872,6 +872,22 @@ NsgIndex::SetKnnGraph(Graph& g) { knng = std::move(g); } +int64_t +NsgIndex::GetSize() { + int64_t ret = 0; + ret += sizeof(*this); + ret += ntotal * dimension * sizeof(float); + ret += ntotal * sizeof(int64_t); + ret += sizeof(*distance_); + for (auto i = 0; i < nsg.size(); ++i) { + ret += nsg[i].size() * sizeof(node_t); + } + for (auto i = 0; i < knng.size(); ++i) { + ret += knng[i].size() * sizeof(node_t); + } + return ret; +} + } // namespace impl } // namespace knowhere } // namespace milvus diff --git a/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.h b/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.h index e620722f..2de1c369 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.h @@ -86,6 +86,9 @@ class NsgIndex { Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist, int64_t* ids, SearchParams& params, faiss::ConcurrentBitsetPtr bitset = nullptr); + int64_t + GetSize(); + // Not support yet. // virtual void Add() = 0; // virtual void Add_with_ids() = 0; diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/Index.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/Index.h index 0722afc1..e4c52586 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/Index.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/BKT/Index.h @@ -79,6 +79,7 @@ namespace SPTAG ~Index() {} inline SizeType GetNumSamples() const { return m_pSamples.R(); } + inline SizeType GetIndexSize() const { return sizeof(*this); } inline DimensionType GetFeatureDim() const { return m_pSamples.C(); } inline int GetCurrMaxCheck() const { return m_iMaxCheck; } diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/Index.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/Index.h index 668d423b..f3240ebd 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/Index.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/KDT/Index.h @@ -79,6 +79,7 @@ namespace SPTAG ~Index() {} inline SizeType GetNumSamples() const { return m_pSamples.R(); } + inline SizeType GetIndexSize() const { return sizeof(*this); } inline DimensionType GetFeatureDim() const { return m_pSamples.C(); } inline int GetCurrMaxCheck() const { return m_iMaxCheck; } diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorIndex.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorIndex.h index 49475794..b93caf0a 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorIndex.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/VectorIndex.h @@ -37,6 +37,7 @@ public: virtual DimensionType GetFeatureDim() const = 0; virtual SizeType GetNumSamples() const = 0; + virtual SizeType GetIndexSize() const = 0; virtual DistCalcMethod GetDistCalcMethod() const = 0; virtual IndexAlgoType GetIndexAlgoType() const = 0; diff --git a/core/src/index/thirdparty/annoy/src/annoylib.h b/core/src/index/thirdparty/annoy/src/annoylib.h index 00058099..605137a8 100644 --- a/core/src/index/thirdparty/annoy/src/annoylib.h +++ b/core/src/index/thirdparty/annoy/src/annoylib.h @@ -850,6 +850,7 @@ class AnnoyIndexInterface { virtual void get_item(S item, T* v) const = 0; virtual void set_seed(int q) = 0; virtual bool on_disk_build(const char* filename, char** error=nullptr) = 0; + virtual int64_t cal_size() = 0; }; template @@ -1396,6 +1397,14 @@ protected: result->push_back(nns_dist[i].second); } } + + int64_t cal_size() { + int64_t ret = 0; + ret += sizeof(*this); + ret += _roots.size() * sizeof(S); + ret += std::max(_n_nodes, _nodes_size) * _s; + return ret; + } }; #endif diff --git a/core/src/index/thirdparty/hnswlib/hnswalg.h b/core/src/index/thirdparty/hnswlib/hnswalg.h index 8c54c8c3..d8545538 100644 --- a/core/src/index/thirdparty/hnswlib/hnswalg.h +++ b/core/src/index/thirdparty/hnswlib/hnswalg.h @@ -75,7 +75,6 @@ class HierarchicalNSW : public AlgorithmInterface { throw std::runtime_error("Not enough memory: HierarchicalNSW failed to allocate linklists"); size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); mult_ = 1 / log(1.0 * M_); - revSize_ = 1.0 / mult_; } struct CompareByFirst { @@ -113,7 +112,7 @@ class HierarchicalNSW : public AlgorithmInterface { size_t maxM0_; size_t ef_construction_; - double mult_, revSize_; + double mult_; int maxlevel_; @@ -709,7 +708,6 @@ class HierarchicalNSW : public AlgorithmInterface { if (linkLists_ == nullptr) throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklists"); element_levels_ = std::vector(max_elements); - revSize_ = 1.0 / mult_; ef_ = 10; for (size_t i = 0; i < cur_element_count; i++) { label_lookup_[getExternalLabel(i)]=i; @@ -846,7 +844,6 @@ class HierarchicalNSW : public AlgorithmInterface { if (linkLists_ == nullptr) throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklists"); element_levels_ = std::vector(max_elements); - revSize_ = 1.0 / mult_; ef_ = 10; for (size_t i = 0; i < cur_element_count; i++) { label_lookup_[getExternalLabel(i)]=i; @@ -1130,6 +1127,21 @@ class HierarchicalNSW : public AlgorithmInterface { return result; } + + int64_t cal_size() { + int64_t ret = 0; + ret += sizeof(*this); + ret += sizeof(*space); + ret += visited_list_pool_->GetSize(); + ret += link_list_locks_.size() * sizeof(std::mutex); + ret += element_levels_.size() * sizeof(int); + ret += max_elements_ * size_data_per_element_; + ret += max_elements_ * sizeof(void*); + for (auto i = 0; i < max_elements_; ++ i) { + ret += linkLists_[i] ? size_links_per_element_ * element_levels_[i] : 0; + } + return ret; + } }; } diff --git a/core/src/index/thirdparty/hnswlib/visited_list_pool.h b/core/src/index/thirdparty/hnswlib/visited_list_pool.h index 457f7343..1a86ff6a 100644 --- a/core/src/index/thirdparty/hnswlib/visited_list_pool.h +++ b/core/src/index/thirdparty/hnswlib/visited_list_pool.h @@ -26,6 +26,7 @@ class VisitedList { } }; + ~VisitedList() { delete[] mass; } }; @@ -74,6 +75,12 @@ class VisitedListPool { delete rez; } }; + + int64_t GetSize() { + auto visit_list_size = sizeof(VisitedList) + numelements * sizeof(vl_type); + auto pool_size = pool.size() * (sizeof(VisitedList *) + visit_list_size); + return pool_size + sizeof(*this); + } }; } -- GitLab