提交 95105bfd 编写于 作者: X xiaojun.lin

Merge remote-tracking branch 'official/master' into fix_1564

Signed-off-by: Nxiaojun.lin <xiaojun.lin@zilliz.com>
......@@ -47,6 +47,7 @@ Please mark all change in change log and use the issue from GitHub
- \#1556 Index file not created after table and index created
- \#1560 Search crashed with Super-high dimensional binary vector
- \#1564 Too low recall for glove-200-angular, ivf_pq index
- \#1574 Set all existing bitset in cache when applying deletes
## Feature
- \#216 Add CLI to get server info
......
......@@ -236,11 +236,27 @@ MemTable::ApplyDeletes() {
utils::GetParentPath(table_file.location_, segment_dir);
segment::SegmentReader segment_reader(segment_dir);
auto index =
std::static_pointer_cast<VecIndex>(cache::CpuCacheMgr::GetInstance()->GetIndex(table_file.location_));
faiss::ConcurrentBitsetPtr blacklist = nullptr;
if (index != nullptr) {
status = index->GetBlacklist(blacklist);
auto& segment_id = table_file.segment_id_;
meta::TableFilesSchema segment_files;
status = meta_->GetTableFilesBySegmentId(segment_id, segment_files);
if (!status.ok()) {
break;
}
// Get all index that contains blacklist in cache
std::vector<VecIndexPtr> indexes;
std::vector<faiss::ConcurrentBitsetPtr> blacklists;
for (auto& file : segment_files) {
auto index =
std::static_pointer_cast<VecIndex>(cache::CpuCacheMgr::GetInstance()->GetIndex(file.location_));
faiss::ConcurrentBitsetPtr blacklist = nullptr;
if (index != nullptr) {
index->GetBlacklist(blacklist);
if (blacklist != nullptr) {
indexes.emplace_back(index);
blacklists.emplace_back(blacklist);
}
}
}
std::vector<segment::doc_id_t> uids;
......@@ -293,7 +309,7 @@ MemTable::ApplyDeletes() {
id_bloom_filter_ptr->Remove(uids[i]);
}
if (blacklist != nullptr) {
for (auto& blacklist : blacklists) {
if (!blacklist->test(i)) {
blacklist->set(i);
}
......@@ -308,8 +324,8 @@ MemTable::ApplyDeletes() {
<< find_diff.count() << " s in total";
ENGINE_LOG_DEBUG << "Setting deleted docs and bloom filter took " << set_diff.count() << " s in total";
if (index != nullptr) {
index->SetBlacklist(blacklist);
for (auto i = 0; i < indexes.size(); ++i) {
indexes[i]->SetBlacklist(blacklists[i]);
}
start = std::chrono::high_resolution_clock::now();
......@@ -339,12 +355,6 @@ MemTable::ApplyDeletes() {
<< " s";
// Update table file row count
auto& segment_id = table_file.segment_id_;
meta::TableFilesSchema segment_files;
status = meta_->GetTableFilesBySegmentId(segment_id, segment_files);
if (!status.ok()) {
break;
}
for (auto& file : segment_files) {
if (file.file_type_ == meta::TableFileSchema::RAW || file.file_type_ == meta::TableFileSchema::TO_INDEX ||
file.file_type_ == meta::TableFileSchema::INDEX || file.file_type_ == meta::TableFileSchema::BACKUP) {
......
......@@ -12,10 +12,10 @@
#pragma once
#include <faiss/utils/ConcurrentBitset.h>
#include <thirdparty/nlohmann/json.hpp>
#include <memory>
#include <string>
#include <thirdparty/nlohmann/json.hpp>
#include <utility>
#include <vector>
......@@ -180,13 +180,14 @@ class VecIndex : public cache::DataObj {
virtual Status
SetBlacklist(faiss::ConcurrentBitsetPtr list) {
ENGINE_LOG_ERROR << "SetBlacklist not support";
// ENGINE_LOG_ERROR << "SetBlacklist not support";
return Status::OK();
}
virtual Status
GetBlacklist(faiss::ConcurrentBitsetPtr& list) {
ENGINE_LOG_ERROR << "GetBlacklist not support";
// ENGINE_LOG_ERROR << "GetBlacklist not support";
ENGINE_LOG_WARNING << "Deletion on unsupported index type";
return Status::OK();
}
......
此差异已折叠。
......@@ -68,7 +68,7 @@ TEST_F(SearchByIdTest, basic) {
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
table_info_get.table_id_ = table_info.table_id_;
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
......@@ -81,7 +81,7 @@ TEST_F(SearchByIdTest, basic) {
xb.id_array_.push_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
stat = db_->InsertVectors(table_info.table_id_, "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
......@@ -108,7 +108,8 @@ TEST_F(SearchByIdTest, basic) {
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
stat = db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, i, result_ids, result_distances);
stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, json_params, i, result_ids,
result_distances);
ASSERT_EQ(result_ids[0], i);
ASSERT_LT(result_distances[0], 1e-4);
}
......@@ -119,7 +120,7 @@ TEST_F(SearchByIdTest, with_index) {
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
table_info_get.table_id_ = table_info.table_id_;
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
......@@ -132,7 +133,7 @@ TEST_F(SearchByIdTest, with_index) {
xb.id_array_.push_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
stat = db_->InsertVectors(table_info.table_id_, "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
......@@ -153,7 +154,7 @@ TEST_F(SearchByIdTest, with_index) {
milvus::engine::TableIndex index;
index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8;
index.extra_params_ = {{"nlist", 10}};
stat = db_->CreateIndex(GetTableName(), index);
stat = db_->CreateIndex(table_info.table_id_, index);
ASSERT_TRUE(stat.ok());
const int topk = 10, nprobe = 10;
......@@ -165,7 +166,8 @@ TEST_F(SearchByIdTest, with_index) {
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
stat = db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, i, result_ids, result_distances);
stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, json_params, i, result_ids,
result_distances);
ASSERT_EQ(result_ids[0], i);
ASSERT_LT(result_distances[0], 1e-3);
}
......@@ -176,7 +178,7 @@ TEST_F(SearchByIdTest, with_delete) {
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
table_info_get.table_id_ = table_info.table_id_;
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
......@@ -189,7 +191,7 @@ TEST_F(SearchByIdTest, with_delete) {
xb.id_array_.push_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
stat = db_->InsertVectors(table_info.table_id_, "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
......@@ -211,7 +213,7 @@ TEST_F(SearchByIdTest, with_delete) {
for (auto& id : ids_to_search) {
ids_to_delete.emplace_back(id);
}
stat = db_->DeleteVectors(GetTableName(), ids_to_delete);
stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete);
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
......@@ -225,7 +227,8 @@ TEST_F(SearchByIdTest, with_delete) {
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
stat = db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, i, result_ids, result_distances);
stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, json_params, i, result_ids,
result_distances);
ASSERT_EQ(result_ids[0], -1);
ASSERT_EQ(result_distances[0], std::numeric_limits<float>::max());
}
......@@ -236,7 +239,7 @@ TEST_F(GetVectorByIdTest, basic) {
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
table_info_get.table_id_ = table_info.table_id_;
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
......@@ -249,7 +252,7 @@ TEST_F(GetVectorByIdTest, basic) {
xb.id_array_.push_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
stat = db_->InsertVectors(table_info.table_id_, "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
......@@ -277,11 +280,11 @@ TEST_F(GetVectorByIdTest, basic) {
milvus::engine::ResultDistances result_distances;
milvus::engine::VectorsData vector;
stat = db_->GetVectorByID(GetTableName(), id, vector);
stat = db_->GetVectorByID(table_info.table_id_, id, vector);
ASSERT_TRUE(stat.ok());
stat =
db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, vector, result_ids, result_distances);
stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, json_params, vector, result_ids,
result_distances);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(result_ids[0], id);
ASSERT_LT(result_distances[0], 1e-4);
......@@ -293,7 +296,7 @@ TEST_F(GetVectorByIdTest, with_index) {
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
table_info_get.table_id_ = table_info.table_id_;
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
......@@ -306,7 +309,7 @@ TEST_F(GetVectorByIdTest, with_index) {
xb.id_array_.push_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
stat = db_->InsertVectors(table_info.table_id_, "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
......@@ -327,7 +330,7 @@ TEST_F(GetVectorByIdTest, with_index) {
milvus::engine::TableIndex index;
index.extra_params_ = {{"nlist", 10}};
index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8;
stat = db_->CreateIndex(GetTableName(), index);
stat = db_->CreateIndex(table_info.table_id_, index);
ASSERT_TRUE(stat.ok());
const int topk = 10, nprobe = 10;
......@@ -340,11 +343,11 @@ TEST_F(GetVectorByIdTest, with_index) {
milvus::engine::ResultDistances result_distances;
milvus::engine::VectorsData vector;
stat = db_->GetVectorByID(GetTableName(), id, vector);
stat = db_->GetVectorByID(table_info.table_id_, id, vector);
ASSERT_TRUE(stat.ok());
stat =
db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, vector, result_ids, result_distances);
stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, json_params, vector, result_ids,
result_distances);
ASSERT_EQ(result_ids[0], id);
ASSERT_LT(result_distances[0], 1e-3);
}
......@@ -355,7 +358,7 @@ TEST_F(GetVectorByIdTest, with_delete) {
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
table_info_get.table_id_ = table_info.table_id_;
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
......@@ -368,7 +371,7 @@ TEST_F(GetVectorByIdTest, with_delete) {
xb.id_array_.push_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
stat = db_->InsertVectors(table_info.table_id_, "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
......@@ -390,7 +393,7 @@ TEST_F(GetVectorByIdTest, with_delete) {
for (auto& id : ids_to_search) {
ids_to_delete.emplace_back(id);
}
stat = db_->DeleteVectors(GetTableName(), ids_to_delete);
stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete);
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
......@@ -402,7 +405,7 @@ TEST_F(GetVectorByIdTest, with_delete) {
milvus::engine::ResultDistances result_distances;
milvus::engine::VectorsData vector;
stat = db_->GetVectorByID(GetTableName(), id, vector);
stat = db_->GetVectorByID(table_info.table_id_, id, vector);
ASSERT_TRUE(stat.ok());
ASSERT_TRUE(vector.float_data_.empty());
ASSERT_EQ(vector.vector_count_, 0);
......@@ -419,7 +422,7 @@ TEST_F(SearchByIdTest, BINARY) {
ASSERT_TRUE(stat.ok());
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
table_info_get.table_id_ = table_info.table_id_;
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
......@@ -445,7 +448,7 @@ TEST_F(SearchByIdTest, BINARY) {
vectors.id_array_.emplace_back(k * nb + i);
}
stat = db_->InsertVectors(GetTableName(), "", vectors);
stat = db_->InsertVectors(table_info.table_id_, "", vectors);
ASSERT_TRUE(stat.ok());
}
......@@ -465,7 +468,7 @@ TEST_F(SearchByIdTest, BINARY) {
ASSERT_TRUE(stat.ok());
uint64_t row_count;
stat = db_->GetTableRowCount(GetTableName(), row_count);
stat = db_->GetTableRowCount(table_info.table_id_, row_count);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(row_count, nb * insert_loop);
......@@ -479,12 +482,12 @@ TEST_F(SearchByIdTest, BINARY) {
milvus::engine::ResultDistances result_distances;
milvus::engine::VectorsData vector;
stat = db_->GetVectorByID(GetTableName(), id, vector);
stat = db_->GetVectorByID(table_info.table_id_, id, vector);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(vector.vector_count_, 1);
stat =
db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, vector, result_ids, result_distances);
stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, json_params, vector, result_ids,
result_distances);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(result_ids[0], id);
ASSERT_LT(result_distances[0], 1e-4);
......@@ -493,8 +496,8 @@ TEST_F(SearchByIdTest, BINARY) {
result_ids.clear();
result_distances.clear();
stat =
db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, id, result_ids, result_distances);
stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, json_params, id, result_ids,
result_distances);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(result_ids[0], id);
ASSERT_LT(result_distances[0], 1e-4);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册