未验证 提交 53360cda 编写于 作者: G groot 提交者: GitHub

reduce uid copy during search (#3867)

Signed-off-by: Ngroot <yihua.mo@zilliz.com>
上级 7fcaa5be
......@@ -244,6 +244,8 @@ SegmentReader::LoadUids(std::vector<engine::idx_t>& uids) {
return Status(DB_ERROR, err_msg);
}
TimeRecorderAuto recorder("SegmentReader::LoadUids");
uids.clear();
uids.resize(raw->data_.size() / sizeof(engine::idx_t));
memcpy(uids.data(), raw->data_.data(), raw->data_.size());
......@@ -269,12 +271,9 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
return Status(DB_ERROR, "Field is not vector type");
}
// load uids
std::vector<int64_t> uids;
STATUS_CHECK(LoadUids(uids));
// load deleted doc
faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(uids.size());
int64_t row_count = GetRowCount();
faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(row_count);
segment::DeletedDocsPtr deleted_docs_ptr;
LoadDeletedDocs(deleted_docs_ptr);
if (deleted_docs_ptr != nullptr) {
......@@ -307,7 +306,11 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
engine::BinaryDataPtr raw;
STATUS_CHECK(LoadField(field_name, raw, false));
auto dataset = knowhere::GenDataset(uids.size(), dimension, raw->data_.data());
// load uids
std::vector<int64_t> uids;
STATUS_CHECK(LoadUids(uids));
auto dataset = knowhere::GenDataset(row_count, dimension, raw->data_.data());
// construct IDMAP index
knowhere::VecIndexFactory& vec_index_factory = knowhere::VecIndexFactory::GetInstance();
......@@ -326,9 +329,9 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
segment_ptr_->SetVectorIndex(field_name, index_ptr);
cache::CpuCacheMgr::GetInstance().InsertItem(temp_index_path, index_ptr);
recorder.RecordSection("construct temp IDMAP index");
}
recorder.RecordSection("create temp IDMAP index");
return Status::OK();
}
......@@ -377,11 +380,16 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
STATUS_CHECK(ss_codec.GetVectorIndexFormat()->ConstructIndex(index_type, index_data, raw_data, compress_data,
index_ptr));
// load uids
std::vector<int64_t> uids;
STATUS_CHECK(LoadUids(uids));
index_ptr->SetUids(uids);
index_ptr->SetBlacklist(concurrent_bitset_ptr);
segment_ptr_->SetVectorIndex(field_name, index_ptr);
cache::CpuCacheMgr::GetInstance().InsertItem(index_file_path, index_ptr); // put into cache
recorder.RecordSection("construct index");
} catch (std::exception& e) {
std::string err_msg = "Failed to load vector index: " + std::string(e.what());
LOG_ENGINE_ERROR_ << err_msg;
......@@ -506,7 +514,7 @@ SegmentReader::LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
Status
SegmentReader::LoadDeletedDocs(segment::DeletedDocsPtr& deleted_docs_ptr) {
try {
TimeRecorder recorder("SegmentReader::LoadDeletedDocs");
TimeRecorderAuto recorder("SegmentReader::LoadDeletedDocs");
deleted_docs_ptr = segment_ptr_->GetDeletedDocs();
if (deleted_docs_ptr != nullptr) {
......@@ -611,6 +619,30 @@ SegmentReader::GetTempIndexPath(const std::string& field_name, std::string& path
return Status::OK();
}
int64_t
SegmentReader::GetRowCount() {
engine::BinaryDataPtr raw;
auto status = LoadField(engine::FIELD_UID, raw);
if (!status.ok()) {
LOG_ENGINE_ERROR_ << status.message();
return 0;
}
if (raw == nullptr) {
LOG_ENGINE_ERROR_ << "Failed to load id field";
return 0;
}
if (raw->data_.size() % sizeof(engine::idx_t) != 0) {
std::string err_msg = "Failed to load uids: illegal file size";
LOG_ENGINE_ERROR_ << err_msg;
return 0;
}
int64_t count = raw->data_.size() / sizeof(engine::idx_t);
return count;
}
Status
SegmentReader::ClearCache() {
TimeRecorderAuto recorder("SegmentReader::ClearCache");
......
......@@ -95,6 +95,9 @@ class SegmentReader {
return segment_visitor_;
}
int64_t
GetRowCount();
// clear cache from cache manager, use this method for segment merge/compact and collection/partition drop
Status
ClearCache();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册