From bf6d22e2f546d114c70bdd79182c1274fe5a4e04 Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Tue, 7 Apr 2020 16:25:46 +0800 Subject: [PATCH] #1873 fix index file serialize to incorrect path (#1874) * #1873 fix index file serialize to incorrect path Signed-off-by: yudong.cai * not create sq8h index when gpu disabled Signed-off-by: yudong.cai --- CHANGELOG.md | 3 +- core/src/codecs/VectorIndexFormat.h | 5 ++- .../default/DefaultVectorIndexFormat.cpp | 40 ++++--------------- .../codecs/default/DefaultVectorIndexFormat.h | 6 ++- core/src/db/engine/ExecutionEngineImpl.cpp | 4 +- .../index/vector_index/ConfAdapterMgr.cpp | 2 +- .../index/vector_index/VecIndexFactory.cpp | 3 ++ core/src/segment/SegmentReader.cpp | 4 +- core/src/segment/SegmentReader.h | 2 +- core/src/segment/SegmentWriter.cpp | 4 +- core/src/segment/SegmentWriter.h | 2 +- core/src/segment/VectorIndex.h | 11 ----- 12 files changed, 29 insertions(+), 57 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0286dea8..437d690d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ Please mark all change in change log and use the issue from GitHub # Milvus 0.8.0 (TBD) ## Bug -- \#1762 Server is not forbidden to create new partition which tag is "_default" +- \#1762 Server is not forbidden to create new partition which tag is `_default` +- \#1873 Fix index file serialize to incorrect path ## Feature - \#261 Integrate ANNOY into Milvus diff --git a/core/src/codecs/VectorIndexFormat.h b/core/src/codecs/VectorIndexFormat.h index c3b113cb..d25fcd71 100644 --- a/core/src/codecs/VectorIndexFormat.h +++ b/core/src/codecs/VectorIndexFormat.h @@ -29,10 +29,11 @@ namespace codec { class VectorIndexFormat { public: virtual void - read(const storage::FSHandlerPtr& fs_ptr, segment::VectorIndexPtr& vector_index) = 0; + read(const storage::FSHandlerPtr& fs_ptr, const std::string& location, segment::VectorIndexPtr& vector_index) = 0; virtual void - write(const storage::FSHandlerPtr& fs_ptr, const segment::VectorIndexPtr& vector_index) = 0; + write(const storage::FSHandlerPtr& fs_ptr, const std::string& location, + const segment::VectorIndexPtr& vector_index) = 0; }; using VectorIndexFormatPtr = std::shared_ptr; diff --git a/core/src/codecs/default/DefaultVectorIndexFormat.cpp b/core/src/codecs/default/DefaultVectorIndexFormat.cpp index 3993d503..4ce2b2f4 100644 --- a/core/src/codecs/default/DefaultVectorIndexFormat.cpp +++ b/core/src/codecs/default/DefaultVectorIndexFormat.cpp @@ -98,7 +98,8 @@ DefaultVectorIndexFormat::read_internal(const storage::FSHandlerPtr& fs_ptr, con } void -DefaultVectorIndexFormat::read(const storage::FSHandlerPtr& fs_ptr, segment::VectorIndexPtr& vector_index) { +DefaultVectorIndexFormat::read(const storage::FSHandlerPtr& fs_ptr, const std::string& location, + segment::VectorIndexPtr& vector_index) { const std::lock_guard lock(mutex_); std::string dir_path = fs_ptr->operation_ptr_->GetDirectory(); @@ -108,42 +109,17 @@ DefaultVectorIndexFormat::read(const storage::FSHandlerPtr& fs_ptr, segment::Vec throw Exception(SERVER_INVALID_ARGUMENT, err_msg); } - boost::filesystem::path target_path(dir_path); - typedef boost::filesystem::directory_iterator d_it; - d_it it_end; - d_it it(target_path); - - for (; it != it_end; ++it) { - const auto& path = it->path(); - - // if (path.extension().string() == vector_index_extension_) { - /* tmp solution, should be replaced when use .idx as index extension name */ - const std::string& location = path.string(); - if (location.substr(location.length() - 3) == "000") { - knowhere::VecIndexPtr index = read_internal(fs_ptr, location); - vector_index->SetVectorIndex(index); - vector_index->SetName(path.stem().string()); - return; - } - } -} - -std::string -GenerateFileName() { - auto now = std::chrono::system_clock::now(); - auto micros = std::chrono::duration_cast(now.time_since_epoch()).count(); - return std::to_string(micros * 1000); + knowhere::VecIndexPtr index = read_internal(fs_ptr, location); + vector_index->SetVectorIndex(index); } void -DefaultVectorIndexFormat::write(const storage::FSHandlerPtr& fs_ptr, const segment::VectorIndexPtr& vector_index) { +DefaultVectorIndexFormat::write(const storage::FSHandlerPtr& fs_ptr, const std::string& location, + const segment::VectorIndexPtr& vector_index) { const std::lock_guard lock(mutex_); std::string dir_path = fs_ptr->operation_ptr_->GetDirectory(); - const std::string index_file_path = dir_path + "/" + GenerateFileName(); - // const std::string index_file_path = dir_path + "/" + vector_index->GetName() + vector_index_extension_; - milvus::TimeRecorder recorder("write_index"); knowhere::VecIndexPtr index = vector_index->GetVectorIndex(); @@ -152,7 +128,7 @@ DefaultVectorIndexFormat::write(const storage::FSHandlerPtr& fs_ptr, const segme int32_t index_type = knowhere::StrToOldIndexType(index->index_type()); recorder.RecordSection("Start"); - fs_ptr->writer_ptr_->open(index_file_path); + fs_ptr->writer_ptr_->open(location); fs_ptr->writer_ptr_->write(&index_type, sizeof(index_type)); @@ -171,7 +147,7 @@ DefaultVectorIndexFormat::write(const storage::FSHandlerPtr& fs_ptr, const segme double span = recorder.RecordSection("End"); double rate = fs_ptr->writer_ptr_->length() * 1000000.0 / span / 1024 / 1024; - ENGINE_LOG_DEBUG << "write_index(" << index_file_path << ") rate " << rate << "MB/s"; + ENGINE_LOG_DEBUG << "write_index(" << location << ") rate " << rate << "MB/s"; } } // namespace codec diff --git a/core/src/codecs/default/DefaultVectorIndexFormat.h b/core/src/codecs/default/DefaultVectorIndexFormat.h index 58c8b39e..945ff31f 100644 --- a/core/src/codecs/default/DefaultVectorIndexFormat.h +++ b/core/src/codecs/default/DefaultVectorIndexFormat.h @@ -30,10 +30,12 @@ class DefaultVectorIndexFormat : public VectorIndexFormat { DefaultVectorIndexFormat() = default; void - read(const storage::FSHandlerPtr& fs_ptr, segment::VectorIndexPtr& vector_index) override; + read(const storage::FSHandlerPtr& fs_ptr, const std::string& location, + segment::VectorIndexPtr& vector_index) override; void - write(const storage::FSHandlerPtr& fs_ptr, const segment::VectorIndexPtr& vector_index) override; + write(const storage::FSHandlerPtr& fs_ptr, const std::string& location, + const segment::VectorIndexPtr& vector_index) override; // No copy and move DefaultVectorIndexFormat(const DefaultVectorIndexFormat&) = delete; diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index f0f3a404..fd6d53f5 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -358,7 +358,7 @@ ExecutionEngineImpl::Serialize() { utils::GetParentPath(location_, segment_dir); auto segment_writer_ptr = std::make_shared(segment_dir); segment_writer_ptr->SetVectorIndex(index_); - segment_writer_ptr->WriteVectorIndex(); + segment_writer_ptr->WriteVectorIndex(location_); // here we reset index size by file size, // since some index type(such as SQ8) data size become smaller after serialized @@ -443,7 +443,7 @@ ExecutionEngineImpl::Load(bool to_cache) { try { segment::SegmentPtr segment_ptr; segment_reader_ptr->GetSegment(segment_ptr); - auto status = segment_reader_ptr->LoadVectorIndex(segment_ptr->vector_index_ptr_); + auto status = segment_reader_ptr->LoadVectorIndex(location_, segment_ptr->vector_index_ptr_); index_ = segment_ptr->vector_index_ptr_->GetVectorIndex(); if (index_ == nullptr) { diff --git a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapterMgr.cpp b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapterMgr.cpp index f2803246..e9ef9603 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapterMgr.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapterMgr.cpp @@ -25,7 +25,7 @@ AdapterMgr::GetAdapter(const IndexType type) { try { return collection_.at(type)(); } catch (...) { - KNOWHERE_THROW_MSG("Can not find this type of confadapter"); + KNOWHERE_THROW_MSG("Can not find confadapter: " + type); } } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/VecIndexFactory.cpp b/core/src/index/knowhere/knowhere/index/vector_index/VecIndexFactory.cpp index 8e3119ec..ff8e184b 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/VecIndexFactory.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/VecIndexFactory.cpp @@ -65,6 +65,9 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) { return std::make_shared(); #ifdef MILVUS_GPU_VERSION } else if (type == IndexEnum::INDEX_FAISS_IVFSQ8H) { + if (mode == IndexMode::MODE_CPU) { + return nullptr; + } return std::make_shared(gpu_device); #endif } else if (type == IndexEnum::INDEX_FAISS_BIN_IDMAP) { diff --git a/core/src/segment/SegmentReader.cpp b/core/src/segment/SegmentReader.cpp index eddc72be..384be3af 100644 --- a/core/src/segment/SegmentReader.cpp +++ b/core/src/segment/SegmentReader.cpp @@ -93,11 +93,11 @@ SegmentReader::GetSegment(SegmentPtr& segment_ptr) { } Status -SegmentReader::LoadVectorIndex(segment::VectorIndexPtr& vector_index_ptr) { +SegmentReader::LoadVectorIndex(const std::string& location, segment::VectorIndexPtr& vector_index_ptr) { codec::DefaultCodec default_codec; try { fs_ptr_->operation_ptr_->CreateDirectory(); - default_codec.GetVectorIndexFormat()->read(fs_ptr_, vector_index_ptr); + default_codec.GetVectorIndexFormat()->read(fs_ptr_, location, vector_index_ptr); } catch (std::exception& e) { std::string err_msg = "Failed to load vector index: " + std::string(e.what()); ENGINE_LOG_ERROR << err_msg; diff --git a/core/src/segment/SegmentReader.h b/core/src/segment/SegmentReader.h index 816542b6..d260bbc3 100644 --- a/core/src/segment/SegmentReader.h +++ b/core/src/segment/SegmentReader.h @@ -46,7 +46,7 @@ class SegmentReader { LoadUids(std::vector& uids); Status - LoadVectorIndex(segment::VectorIndexPtr& vector_index_ptr); + LoadVectorIndex(const std::string& location, segment::VectorIndexPtr& vector_index_ptr); Status LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr); diff --git a/core/src/segment/SegmentWriter.cpp b/core/src/segment/SegmentWriter.cpp index 37e498d9..2589cb56 100644 --- a/core/src/segment/SegmentWriter.cpp +++ b/core/src/segment/SegmentWriter.cpp @@ -106,11 +106,11 @@ SegmentWriter::WriteVectors() { } Status -SegmentWriter::WriteVectorIndex() { +SegmentWriter::WriteVectorIndex(const std::string& location) { codec::DefaultCodec default_codec; try { fs_ptr_->operation_ptr_->CreateDirectory(); - default_codec.GetVectorIndexFormat()->write(fs_ptr_, segment_ptr_->vector_index_ptr_); + default_codec.GetVectorIndexFormat()->write(fs_ptr_, location, segment_ptr_->vector_index_ptr_); } catch (std::exception& e) { std::string err_msg = "Failed to write vector index: " + std::string(e.what()); ENGINE_LOG_ERROR << err_msg; diff --git a/core/src/segment/SegmentWriter.h b/core/src/segment/SegmentWriter.h index d150acea..cf35fab9 100644 --- a/core/src/segment/SegmentWriter.h +++ b/core/src/segment/SegmentWriter.h @@ -63,7 +63,7 @@ class SegmentWriter { VectorCount(); Status - WriteVectorIndex(); + WriteVectorIndex(const std::string& location); private: Status diff --git a/core/src/segment/VectorIndex.h b/core/src/segment/VectorIndex.h index 1fb388e1..ccb70675 100644 --- a/core/src/segment/VectorIndex.h +++ b/core/src/segment/VectorIndex.h @@ -41,16 +41,6 @@ class VectorIndex { index_ptr_ = index_ptr; } - void - SetName(const std::string& name) { - name_ = name; - } - - const std::string& - GetName() const { - return name_; - } - // No copy and move VectorIndex(const VectorIndex&) = delete; VectorIndex(VectorIndex&&) = delete; @@ -62,7 +52,6 @@ class VectorIndex { private: knowhere::VecIndexPtr index_ptr_ = nullptr; - std::string name_; }; using VectorIndexPtr = std::shared_ptr; -- GitLab