From 96d799655a48bac617e03f9203ecefcac2d0ccc7 Mon Sep 17 00:00:00 2001 From: yukun Date: Tue, 4 Aug 2020 15:00:15 +0800 Subject: [PATCH] Add DBImpl::Query unittest (#3119) * Fix Block Format Read bug Signed-off-by: fishpenguin * Fix Search crash bug Signed-off-by: fishpenguin * Fix CreateCollection bug Signed-off-by: fishpenguin * Add db->Query unittest Signed-off-by: fishpenguin Co-authored-by: Wang XiangYu --- core/src/db/DBImpl.cpp | 4 + core/src/segment/SegmentReader.cpp | 2 +- core/unittest/db/test_db.cpp | 207 ++++++++++++++++++++--------- 3 files changed, 147 insertions(+), 66 deletions(-) diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index add49677..b3d00fee 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -608,6 +608,10 @@ DBImpl::Query(const server::ContextPtr& context, const query::QueryPtr& query_pt TimeRecorder rc("DBImpl::Query"); + if (!query_ptr->root) { + return Status{DB_ERROR, "BinaryQuery is null"}; + } + snapshot::ScopedSnapshotT ss; STATUS_CHECK(snapshot::Snapshots::GetInstance().GetSnapshot(ss, query_ptr->collection_id)); auto ss_id = ss->GetID(); diff --git a/core/src/segment/SegmentReader.cpp b/core/src/segment/SegmentReader.cpp index 2e2e9c05..dacb1eaf 100644 --- a/core/src/segment/SegmentReader.cpp +++ b/core/src/segment/SegmentReader.cpp @@ -358,7 +358,7 @@ SegmentReader::LoadStructuredIndex(const std::string& field_name, knowhere::Inde // read field index auto index_visitor = field_visitor->GetElementVisitor(engine::FieldElementType::FET_INDEX); - if (index_visitor == nullptr || index_visitor->GetFile() != nullptr) { + if (index_visitor && index_visitor->GetFile() != nullptr) { std::string file_path = engine::snapshot::GetResPath(dir_collections_, index_visitor->GetFile()); ss_codec.GetStructuredIndexFormat()->Read(fs_ptr_, file_path, index_ptr); diff --git a/core/unittest/db/test_db.cpp b/core/unittest/db/test_db.cpp index cf84b633..d5d57d9d 100644 --- a/core/unittest/db/test_db.cpp +++ b/core/unittest/db/test_db.cpp @@ -13,17 +13,17 @@ #include #include -#include -#include #include +#include +#include -#include "segment/Segment.h" -#include "db/utils.h" #include "db/SnapshotUtils.h" #include "db/SnapshotVisitor.h" #include "db/snapshot/IterateHandler.h" #include "db/snapshot/ResourceHelper.h" +#include "db/utils.h" #include "knowhere/index/vector_index/helpers/IndexParameter.h" +#include "segment/Segment.h" using SegmentVisitor = milvus::engine::SegmentVisitor; @@ -36,12 +36,10 @@ CreateCollection(std::shared_ptr db, const std::string& collection_name, context.lsn = lsn; auto collection_schema = std::make_shared(collection_name); context.collection = collection_schema; - auto vector_field = std::make_shared(VECTOR_FIELD_NAME, 0, - milvus::engine::DataType::VECTOR_FLOAT); - auto vector_field_element = std::make_shared(0, 0, "ivfsq8", - milvus::engine::FieldElementType::FET_INDEX); - auto int_field = std::make_shared("int", 0, - milvus::engine::DataType::INT32); + auto vector_field = std::make_shared(VECTOR_FIELD_NAME, 0, milvus::engine::DataType::VECTOR_FLOAT); + auto vector_field_element = + std::make_shared(0, 0, "ivfsq8", milvus::engine::FieldElementType::FET_INDEX); + auto int_field = std::make_shared("int", 0, milvus::engine::DataType::INT32); context.fields_schema[vector_field] = {vector_field_element}; context.fields_schema[int_field] = {}; @@ -78,6 +76,32 @@ CreateCollection2(std::shared_ptr db, const std::string& collection_name return db->CreateCollection(context); } +milvus::Status +CreateCollection3(std::shared_ptr db, const std::string& collection_name, const LSN_TYPE& lsn) { + CreateCollectionContext context; + context.lsn = lsn; + auto collection_schema = std::make_shared(collection_name); + context.collection = collection_schema; + + milvus::json params; + params[milvus::knowhere::meta::DIM] = COLLECTION_DIM; + auto vector_field = std::make_shared("float_vector", 0, milvus::engine::DataType::VECTOR_FLOAT, params); + context.fields_schema[vector_field] = {}; + + std::unordered_map attr_type = { + {"int64", milvus::engine::DataType::INT64}, + }; + + std::vector field_names; + for (auto& pair : attr_type) { + auto field = std::make_shared(pair.first, 0, pair.second); + context.fields_schema[field] = {}; + field_names.push_back(pair.first); + } + + return db->CreateCollection(context); +} + void BuildEntities(uint64_t n, uint64_t batch_index, milvus::engine::DataChunkPtr& data_chunk) { data_chunk = std::make_shared(); @@ -136,13 +160,90 @@ BuildEntities(uint64_t n, uint64_t batch_index, milvus::engine::DataChunkPtr& da data_chunk->fixed_fields_["field_2"] = raw; } } + +void +BuildQueryPtr(const std::string& collection_name, int64_t n, int64_t topk, std::vector& field_names, + std::vector& partitions, milvus::query::QueryPtr& query_ptr) { + auto general_query = std::make_shared(); + query_ptr->collection_id = collection_name; + query_ptr->field_names = field_names; + query_ptr->partitions = partitions; + std::set index_fields = {"int64", "float_vector"}; + query_ptr->index_fields = index_fields; + + auto left_query = std::make_shared(); + auto term_query = std::make_shared(); + std::vector term_value(n, 0); + for (uint64_t i = 0; i < n; i++) { + term_value[i] = i; + } + term_query->json_obj = {{"int64", {{"values", term_value}}}}; + std::cout << term_query->json_obj.dump() << std::endl; + left_query->leaf = std::make_shared(); + left_query->leaf->term_query = term_query; + general_query->bin->left_query = left_query; + + auto right_query = std::make_shared(); + right_query->leaf = std::make_shared(); + std::string placeholder = "placeholder_1"; + right_query->leaf->vector_placeholder = placeholder; + general_query->bin->right_query = right_query; + + auto vector_query = std::make_shared(); + vector_query->field_name = "float_vector"; + vector_query->topk = topk; + milvus::query::VectorRecord vector_record; + vector_record.float_data.resize(n * COLLECTION_DIM); + for (uint64_t i = 0; i < n; i++) { + for (int64_t j = 0; j < COLLECTION_DIM; j++) vector_record.float_data[COLLECTION_DIM * i + j] = drand48(); + vector_record.float_data[COLLECTION_DIM * i] += i / 2000.; + } + vector_query->query_vector = vector_record; + vector_query->extra_params = {{"metric_type", "L2"}, {"nprobe", 1024}}; + + query_ptr->root = general_query; + query_ptr->vectors.insert(std::make_pair(placeholder, vector_query)); +} + +void +BuildEntities2(uint64_t n, uint64_t batch_index, milvus::engine::DataChunkPtr& data_chunk) { + data_chunk = std::make_shared(); + data_chunk->count_ = n; + + milvus::engine::VectorsData vectors; + vectors.vector_count_ = n; + vectors.float_data_.clear(); + vectors.float_data_.resize(n * COLLECTION_DIM); + float* data = vectors.float_data_.data(); + for (uint64_t i = 0; i < n; i++) { + for (int64_t j = 0; j < COLLECTION_DIM; j++) data[COLLECTION_DIM * i + j] = drand48(); + data[COLLECTION_DIM * i] += i / 2000.; + + vectors.id_array_.push_back(n * batch_index + i); + } + + milvus::engine::FIXED_FIELD_DATA& raw = data_chunk->fixed_fields_["float_vector"]; + raw.resize(vectors.float_data_.size() * sizeof(float)); + memcpy(raw.data(), vectors.float_data_.data(), vectors.float_data_.size() * sizeof(float)); + + std::vector value_1; + value_1.resize(n); + + for (uint64_t i = 0; i < n; ++i) { + value_1[i] = i; + } + + { + milvus::engine::FIXED_FIELD_DATA& raw = data_chunk->fixed_fields_["int64"]; + raw.resize(value_1.size() * sizeof(int64_t)); + memcpy(raw.data(), value_1.data(), value_1.size() * sizeof(int64_t)); + } +} } // namespace TEST_F(DBTest, CollectionTest) { LSN_TYPE lsn = 0; - auto next_lsn = [&]() -> decltype(lsn) { - return ++lsn; - }; + auto next_lsn = [&]() -> decltype(lsn) { return ++lsn; }; std::string c1 = "c1"; auto status = CreateCollection(db_, c1, next_lsn()); ASSERT_TRUE(status.ok()); @@ -196,9 +297,7 @@ TEST_F(DBTest, CollectionTest) { TEST_F(DBTest, PartitionTest) { LSN_TYPE lsn = 0; - auto next_lsn = [&]() -> decltype(lsn) { - return ++lsn; - }; + auto next_lsn = [&]() -> decltype(lsn) { return ++lsn; }; std::string c1 = "c1"; auto status = CreateCollection(db_, c1, next_lsn()); ASSERT_TRUE(status.ok()); @@ -235,9 +334,7 @@ TEST_F(DBTest, PartitionTest) { TEST_F(DBTest, VisitorTest) { LSN_TYPE lsn = 0; - auto next_lsn = [&]() -> decltype(lsn) { - return ++lsn; - }; + auto next_lsn = [&]() -> decltype(lsn) { return ++lsn; }; std::string c1 = "c1"; auto status = CreateCollection(db_, c1, next_lsn()); @@ -341,54 +438,34 @@ TEST_F(DBTest, VisitorTest) { TEST_F(DBTest, QueryTest) { LSN_TYPE lsn = 0; - auto next_lsn = [&]() -> decltype(lsn) { - return ++lsn; - }; + auto next_lsn = [&]() -> decltype(lsn) { return ++lsn; }; std::string c1 = "c1"; - auto status = CreateCollection(db_, c1, next_lsn()); + auto status = CreateCollection3(db_, c1, next_lsn()); ASSERT_TRUE(status.ok()); - std::stringstream p_name; - auto num = RandomInt(1, 3); - for (auto i = 0; i < num; ++i) { - p_name.str(""); - p_name << "partition_" << i; - status = db_->CreatePartition(c1, p_name.str()); - ASSERT_TRUE(status.ok()); - } + const uint64_t entity_count = 10000; + milvus::engine::DataChunkPtr data_chunk; + BuildEntities2(entity_count, 0, data_chunk); - ScopedSnapshotT ss; - status = Snapshots::GetInstance().GetSnapshot(ss, c1); + status = db_->Insert(c1, "", data_chunk); ASSERT_TRUE(status.ok()); - SegmentFileContext sf_context; - SFContextBuilder(sf_context, ss); - - auto new_total = 0; - auto &partitions = ss->GetResources(); - ID_TYPE partition_id; - for (auto &kv : partitions) { - num = RandomInt(1, 3); - auto row_cnt = 100; - for (auto i = 0; i < num; ++i) { - ASSERT_TRUE(CreateSegment(ss, kv.first, next_lsn(), sf_context, row_cnt).ok()); - } - new_total += num; - partition_id = kv.first; - } - - status = Snapshots::GetInstance().GetSnapshot(ss, c1); + status = db_->Flush(); ASSERT_TRUE(status.ok()); milvus::server::ContextPtr ctx1; - std::vector partition_patterns; - milvus::query::GeneralQueryPtr general_query; - milvus::query::QueryPtr query_ptr; + milvus::query::QueryPtr query_ptr = std::make_shared(); + milvus::engine::QueryResultPtr result = std::make_shared(); + std::vector field_names; - std::unordered_map attr_type; - milvus::engine::QueryResult result; - //db_->Query(ctx1, c1, partition_patterns, general_query, query_ptr, field_names, attr_type, result); + std::vector partitions; + int64_t nq = 5; + int64_t topk = 10; + BuildQueryPtr(c1, nq, topk, field_names, partitions, query_ptr); + status = db_->Query(ctx1, query_ptr, result); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(result->row_num_, nq); } TEST_F(DBTest, InsertTest) { @@ -433,7 +510,7 @@ TEST_F(DBTest, MergeTest) { ASSERT_TRUE(status.ok()); } - sleep(2); // wait to merge + sleep(2); // wait to merge int64_t row_count = 0; status = db_->CountEntities(collection_name, row_count); @@ -448,7 +525,7 @@ TEST_F(DBTest, MergeTest) { auto root_path = GetOptions().meta_.path_ + milvus::engine::COLLECTIONS_FOLDER; std::vector segment_paths; - auto seg_executor = [&] (const SegmentPtr& segment, SegmentIterator* handler) -> Status { + auto seg_executor = [&](const SegmentPtr& segment, SegmentIterator* handler) -> Status { std::string res_path = milvus::engine::snapshot::GetResPath(root_path, segment); std::cout << res_path << std::endl; if (!boost::filesystem::is_directory(res_path)) { @@ -463,11 +540,11 @@ TEST_F(DBTest, MergeTest) { ASSERT_TRUE(status.ok()) << status.ToString(); std::set segment_file_paths; - auto sf_executor = [&] (const SegmentFilePtr& segment_file, SegmentFileIterator* handler) -> Status { + auto sf_executor = [&](const SegmentFilePtr& segment_file, SegmentFileIterator* handler) -> Status { std::string res_path = milvus::engine::snapshot::GetResPath(root_path, segment_file); - if (boost::filesystem::is_regular_file(res_path) - || boost::filesystem::is_regular_file(res_path + milvus::codec::IdBloomFilterFormat::FilePostfix()) - || boost::filesystem::is_regular_file(res_path + milvus::codec::DeletedDocsFormat::FilePostfix())) { + if (boost::filesystem::is_regular_file(res_path) || + boost::filesystem::is_regular_file(res_path + milvus::codec::IdBloomFilterFormat::FilePostfix()) || + boost::filesystem::is_regular_file(res_path + milvus::codec::DeletedDocsFormat::FilePostfix())) { segment_file_paths.insert(res_path); std::cout << res_path << std::endl; } @@ -479,7 +556,7 @@ TEST_F(DBTest, MergeTest) { std::set expect_file_paths; boost::filesystem::recursive_directory_iterator iter(root_path); boost::filesystem::recursive_directory_iterator end; - for (; iter != end ; ++iter) { + for (; iter != end; ++iter) { if (boost::filesystem::is_regular_file((*iter).path())) { expect_file_paths.insert((*iter).path().filename().string()); } @@ -608,6 +685,6 @@ TEST_F(DBTest, StatsTest) { int64_t row_count = json_stats[milvus::engine::JSON_ROW_COUNT]; ASSERT_EQ(row_count, entity_count * 2); -// std::string ss = json_stats.dump(); -// std::cout << ss << std::endl; + // std::string ss = json_stats.dump(); + // std::cout << ss << std::endl; } -- GitLab