diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index a0abb5e1ad3ff4f10b5e0fd4ba7d3ed6f04cdd72..ebc836a97875775014549242e81fed67fe0938ec 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -248,144 +248,144 @@ Status DBImpl::Query(const std::string& table_id, const std::vector return QueryAsync(table_id, files_array, k, nq, vectors, dates, results); } -Status DBImpl::QuerySync(const std::string& table_id, uint64_t k, uint64_t nq, - const float* vectors, const meta::DatesT& dates, QueryResults& results) { - meta::DatePartionedTableFilesSchema files; - auto status = meta_ptr_->FilesToSearch(table_id, dates, files); - if (!status.ok()) { return status; } - - ENGINE_LOG_DEBUG << "Search DateT Size = " << files.size(); - - meta::TableFilesSchema index_files; - meta::TableFilesSchema raw_files; - for (auto &day_files : files) { - for (auto &file : day_files.second) { - file.file_type_ == meta::TableFileSchema::INDEX ? - index_files.push_back(file) : raw_files.push_back(file); - } - } - - int dim = 0; - if (!index_files.empty()) { - dim = index_files[0].dimension_; - } else if (!raw_files.empty()) { - dim = raw_files[0].dimension_; - } else { - ENGINE_LOG_DEBUG << "no files to search"; - return Status::OK(); - } - - { - // [{ids, distence}, ...] - using SearchResult = std::pair, std::vector>; - std::vector batchresult(nq); // allocate nq cells. - - auto cluster = [&](long *nns, float *dis, const int& k) -> void { - for (int i = 0; i < nq; ++i) { - auto f_begin = batchresult[i].first.cbegin(); - auto s_begin = batchresult[i].second.cbegin(); - batchresult[i].first.insert(f_begin, nns + i * k, nns + i * k + k); - batchresult[i].second.insert(s_begin, dis + i * k, dis + i * k + k); - } - }; - - // Allocate Memory - float *output_distence; - long *output_ids; - output_distence = (float *) malloc(k * nq * sizeof(float)); - output_ids = (long *) malloc(k * nq * sizeof(long)); - memset(output_distence, 0, k * nq * sizeof(float)); - memset(output_ids, 0, k * nq * sizeof(long)); - - long search_set_size = 0; - - auto search_in_index = [&](meta::TableFilesSchema& file_vec) -> void { - for (auto &file : file_vec) { - - ExecutionEnginePtr index = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_); - index->Load(); - auto file_size = index->PhysicalSize(); - search_set_size += file_size; - - ENGINE_LOG_DEBUG << "Search file_type " << file.file_type_ << " Of Size: " - << file_size/(1024*1024) << " M"; - - int inner_k = index->Count() < k ? index->Count() : k; - auto start_time = METRICS_NOW_TIME; - index->Search(nq, vectors, inner_k, output_distence, output_ids); - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time, end_time); - CollectFileMetrics(file.file_type_, file_size, total_time); - cluster(output_ids, output_distence, inner_k); // cluster to each query - memset(output_distence, 0, k * nq * sizeof(float)); - memset(output_ids, 0, k * nq * sizeof(long)); - } - }; - - auto topk_cpu = [](const std::vector &input_data, - const int &k, - float *output_distence, - long *output_ids) -> void { - std::map> inverted_table; - for (int i = 0; i < input_data.size(); ++i) { - if (inverted_table.count(input_data[i]) == 1) { - auto& ori_vec = inverted_table[input_data[i]]; - ori_vec.push_back(i); - } - else { - inverted_table[input_data[i]] = std::vector{i}; - } - } - - int count = 0; - for (auto &item : inverted_table){ - if (count == k) break; - for (auto &id : item.second){ - output_distence[count] = item.first; - output_ids[count] = id; - if (++count == k) break; - } - } - }; - auto cluster_topk = [&]() -> void { - QueryResult res; - for (auto &result_pair : batchresult) { - auto &dis = result_pair.second; - auto &nns = result_pair.first; - - topk_cpu(dis, k, output_distence, output_ids); - - int inner_k = dis.size() < k ? dis.size() : k; - for (int i = 0; i < inner_k; ++i) { - res.emplace_back(std::make_pair(nns[output_ids[i]], output_distence[i])); // mapping - } - results.push_back(res); // append to result list - res.clear(); - memset(output_distence, 0, k * nq * sizeof(float)); - memset(output_ids, 0, k * nq * sizeof(long)); - } - }; - - search_in_index(raw_files); - search_in_index(index_files); - - ENGINE_LOG_DEBUG << "Search Overall Set Size = " << search_set_size << " M"; - cluster_topk(); - - free(output_distence); - free(output_ids); - } - - if (results.empty()) { - return Status::NotFound("Group " + table_id + ", search result not found!"); - } - - QueryResults temp_results; - CalcScore(nq, vectors, dim, results, temp_results); - results.swap(temp_results); - - return Status::OK(); -} +//Status DBImpl::QuerySync(const std::string& table_id, uint64_t k, uint64_t nq, +// const float* vectors, const meta::DatesT& dates, QueryResults& results) { +// meta::DatePartionedTableFilesSchema files; +// auto status = meta_ptr_->FilesToSearch(table_id, dates, files); +// if (!status.ok()) { return status; } +// +// ENGINE_LOG_DEBUG << "Search DateT Size = " << files.size(); +// +// meta::TableFilesSchema index_files; +// meta::TableFilesSchema raw_files; +// for (auto &day_files : files) { +// for (auto &file : day_files.second) { +// file.file_type_ == meta::TableFileSchema::INDEX ? +// index_files.push_back(file) : raw_files.push_back(file); +// } +// } +// +// int dim = 0; +// if (!index_files.empty()) { +// dim = index_files[0].dimension_; +// } else if (!raw_files.empty()) { +// dim = raw_files[0].dimension_; +// } else { +// ENGINE_LOG_DEBUG << "no files to search"; +// return Status::OK(); +// } +// +// { +// // [{ids, distence}, ...] +// using SearchResult = std::pair, std::vector>; +// std::vector batchresult(nq); // allocate nq cells. +// +// auto cluster = [&](long *nns, float *dis, const int& k) -> void { +// for (int i = 0; i < nq; ++i) { +// auto f_begin = batchresult[i].first.cbegin(); +// auto s_begin = batchresult[i].second.cbegin(); +// batchresult[i].first.insert(f_begin, nns + i * k, nns + i * k + k); +// batchresult[i].second.insert(s_begin, dis + i * k, dis + i * k + k); +// } +// }; +// +// // Allocate Memory +// float *output_distence; +// long *output_ids; +// output_distence = (float *) malloc(k * nq * sizeof(float)); +// output_ids = (long *) malloc(k * nq * sizeof(long)); +// memset(output_distence, 0, k * nq * sizeof(float)); +// memset(output_ids, 0, k * nq * sizeof(long)); +// +// long search_set_size = 0; +// +// auto search_in_index = [&](meta::TableFilesSchema& file_vec) -> void { +// for (auto &file : file_vec) { +// +// ExecutionEnginePtr index = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_); +// index->Load(); +// auto file_size = index->PhysicalSize(); +// search_set_size += file_size; +// +// ENGINE_LOG_DEBUG << "Search file_type " << file.file_type_ << " Of Size: " +// << file_size/(1024*1024) << " M"; +// +// int inner_k = index->Count() < k ? index->Count() : k; +// auto start_time = METRICS_NOW_TIME; +// index->Search(nq, vectors, inner_k, output_distence, output_ids); +// auto end_time = METRICS_NOW_TIME; +// auto total_time = METRICS_MICROSECONDS(start_time, end_time); +// CollectFileMetrics(file.file_type_, file_size, total_time); +// cluster(output_ids, output_distence, inner_k); // cluster to each query +// memset(output_distence, 0, k * nq * sizeof(float)); +// memset(output_ids, 0, k * nq * sizeof(long)); +// } +// }; +// +// auto topk_cpu = [](const std::vector &input_data, +// const int &k, +// float *output_distence, +// long *output_ids) -> void { +// std::map> inverted_table; +// for (int i = 0; i < input_data.size(); ++i) { +// if (inverted_table.count(input_data[i]) == 1) { +// auto& ori_vec = inverted_table[input_data[i]]; +// ori_vec.push_back(i); +// } +// else { +// inverted_table[input_data[i]] = std::vector{i}; +// } +// } +// +// int count = 0; +// for (auto &item : inverted_table){ +// if (count == k) break; +// for (auto &id : item.second){ +// output_distence[count] = item.first; +// output_ids[count] = id; +// if (++count == k) break; +// } +// } +// }; +// auto cluster_topk = [&]() -> void { +// QueryResult res; +// for (auto &result_pair : batchresult) { +// auto &dis = result_pair.second; +// auto &nns = result_pair.first; +// +// topk_cpu(dis, k, output_distence, output_ids); +// +// int inner_k = dis.size() < k ? dis.size() : k; +// for (int i = 0; i < inner_k; ++i) { +// res.emplace_back(std::make_pair(nns[output_ids[i]], output_distence[i])); // mapping +// } +// results.push_back(res); // append to result list +// res.clear(); +// memset(output_distence, 0, k * nq * sizeof(float)); +// memset(output_ids, 0, k * nq * sizeof(long)); +// } +// }; +// +// search_in_index(raw_files); +// search_in_index(index_files); +// +// ENGINE_LOG_DEBUG << "Search Overall Set Size = " << search_set_size << " M"; +// cluster_topk(); +// +// free(output_distence); +// free(output_ids); +// } +// +// if (results.empty()) { +// return Status::NotFound("Group " + table_id + ", search result not found!"); +// } +// +// QueryResults temp_results; +// CalcScore(nq, vectors, dim, results, temp_results); +// results.swap(temp_results); +// +// return Status::OK(); +//} Status DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq, const float* vectors, diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index b4d60a27a9d518cbbb7f1c3a71ae6df5c07dd1fa..43627cbace48763d9a9a4f6c96c6bf69293b8b9d 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -62,8 +62,8 @@ public: virtual ~DBImpl(); private: - Status QuerySync(const std::string& table_id, uint64_t k, uint64_t nq, - const float* vectors, const meta::DatesT& dates, QueryResults& results); +// Status QuerySync(const std::string& table_id, uint64_t k, uint64_t nq, +// const float* vectors, const meta::DatesT& dates, QueryResults& results); Status QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq, const float* vectors, diff --git a/cpp/src/db/ExecutionEngine.cpp b/cpp/src/db/ExecutionEngine.cpp index f27d04dfa0976ab6b6e729b0d69075d0cd7c7f83..3412eb34bd110cffb4496e36d834642c3e79e012 100644 --- a/cpp/src/db/ExecutionEngine.cpp +++ b/cpp/src/db/ExecutionEngine.cpp @@ -11,14 +11,9 @@ namespace zilliz { namespace milvus { namespace engine { -Status ExecutionEngine::AddWithIds(const std::vector& vectors, const std::vector& vector_ids) { - long n1 = (long)vectors.size(); - long n2 = (long)vector_ids.size(); - if (n1 != n2) { - LOG(ERROR) << "vectors size is not equal to the size of vector_ids: " << n1 << "!=" << n2; - return Status::Error("Error: AddWithIds"); - } - return AddWithIds(n1, vectors.data(), vector_ids.data()); +Status ExecutionEngine::AddWithIdArray(const std::vector& vectors, const std::vector& vector_ids) { + long n = (long)vector_ids.size(); + return AddWithIds(n, vectors.data(), vector_ids.data()); } diff --git a/cpp/src/db/ExecutionEngine.h b/cpp/src/db/ExecutionEngine.h index f26dce63715f0def1b7d90f08b98c1643c4ad599..d2b4d01e670b70e5340d74cc7534bc8c4aea6f95 100644 --- a/cpp/src/db/ExecutionEngine.h +++ b/cpp/src/db/ExecutionEngine.h @@ -23,8 +23,7 @@ enum class EngineType { class ExecutionEngine { public: - virtual Status AddWithIds(const std::vector& vectors, - const std::vector& vector_ids); + virtual Status AddWithIdArray(const std::vector& vectors, const std::vector& vector_ids); virtual Status AddWithIds(long n, const float *xdata, const long *xids) = 0; diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index aa311550eee94ba9e47acdb510af96d32096e92a..7b33545d3a1adf52fe7f32759143ed209d767bb2 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -88,20 +88,14 @@ TEST_F(DBTest, CONFIG_TEST) { TEST_F(DBTest, DB_TEST) { - static const std::string table_name = "test_group"; - static const int table_dim = 256; - - engine::meta::TableSchema table_info; - table_info.dimension_ = table_dim; - table_info.table_id_ = table_name; - table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP; + engine::meta::TableSchema table_info = BuildTableSchema(); engine::Status stat = db_->CreateTable(table_info); engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = table_name; + table_info_get.table_id_ = TABLE_NAME; stat = db_->DescribeTable(table_info_get); ASSERT_STATS(stat); - ASSERT_EQ(table_info_get.dimension_, table_dim); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); engine::IDNumbers vector_ids; engine::IDNumbers target_ids; @@ -130,7 +124,7 @@ TEST_F(DBTest, DB_TEST) { prev_count = count; START_TIMER; - stat = db_->Query(table_name, k, qb, qxb.data(), results); + stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); ss << "Search " << j << " With Size " << count/engine::meta::M << " M"; STOP_TIMER(ss.str()); @@ -153,10 +147,10 @@ TEST_F(DBTest, DB_TEST) { for (auto i=0; iInsertVectors(table_name, qb, qxb.data(), target_ids); + db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); ASSERT_EQ(target_ids.size(), qb); } else { - db_->InsertVectors(table_name, nb, xb.data(), vector_ids); + db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); } std::this_thread::sleep_for(std::chrono::microseconds(1)); } @@ -223,6 +217,18 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { engine::meta::TableSchema table_info = BuildTableSchema(); engine::Status stat = db_->CreateTable(table_info); + std::vector table_schema_array; + stat = db_->AllTables(table_schema_array); + ASSERT_STATS(stat); + bool bfound = false; + for(auto& schema : table_schema_array) { + if(schema.table_id_ == TABLE_NAME) { + bfound = true; + break; + } + } + ASSERT_TRUE(bfound); + engine::meta::TableSchema table_info_get; table_info_get.table_id_ = TABLE_NAME; stat = db_->DescribeTable(table_info_get); diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 9baef712ab6c8e492594e0e8b632b417f5940087..49cc47b4e02f24b755b9afb6bf4307aeee6f5fe9 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -39,6 +39,10 @@ TEST_F(MetaTest, TABLE_TEST) { table.table_id_ = table_id; status = impl_->CreateTable(table); ASSERT_TRUE(status.ok()); + + table.table_id_ = ""; + status = impl_->CreateTable(table); + ASSERT_TRUE(status.ok()); } TEST_F(MetaTest, TABLE_FILE_TEST) { @@ -46,6 +50,7 @@ TEST_F(MetaTest, TABLE_FILE_TEST) { meta::TableSchema table; table.table_id_ = table_id; + table.dimension_ = 256; auto status = impl_->CreateTable(table); meta::TableFileSchema table_file; @@ -54,6 +59,11 @@ TEST_F(MetaTest, TABLE_FILE_TEST) { ASSERT_TRUE(status.ok()); ASSERT_EQ(table_file.file_type_, meta::TableFileSchema::NEW); + uint64_t cnt = 0; + status = impl_->Count(table_id, cnt); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(cnt, 0UL); + auto file_id = table_file.file_id_; auto new_file_type = meta::TableFileSchema::INDEX; diff --git a/cpp/unittest/db/misc_test.cpp b/cpp/unittest/db/misc_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f1f87c87418ec9fbcb1ba13248f59c6d07ebafb --- /dev/null +++ b/cpp/unittest/db/misc_test.cpp @@ -0,0 +1,103 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include +#include +#include +#include + +#include "db/FaissExecutionEngine.h" +#include "db/Exception.h" +#include "db/Status.h" +#include "db/Options.h" +#include "db/DBMetaImpl.h" + +#include + +using namespace zilliz::milvus; + +TEST(DBMiscTest, ENGINE_API_TEST) { + //engine api AddWithIdArray + const uint16_t dim = 512; + const long n = 10; + engine::FaissExecutionEngine engine(512, "/tmp/1", "IDMap", "IDMap,Flat"); + std::vector vectors; + std::vector ids; + for (long i = 0; i < n; i++) { + for (uint16_t k = 0; k < dim; k++) { + vectors.push_back((float) k); + } + ids.push_back(i); + } + + auto status = engine.AddWithIdArray(vectors, ids); + ASSERT_TRUE(status.ok()); +} + +TEST(DBMiscTest, EXCEPTION_TEST) { + engine::Exception ex1(""); + std::string what = ex1.what(); + ASSERT_FALSE(what.empty()); + + engine::OutOfRangeException ex2; + what = ex2.what(); + ASSERT_FALSE(what.empty()); +} + +TEST(DBMiscTest, STATUS_TEST) { + engine::Status status = engine::Status::OK(); + std::string str = status.ToString(); + ASSERT_FALSE(str.empty()); + + status = engine::Status::Error("wrong", "mistake"); + ASSERT_TRUE(status.IsError()); + str = status.ToString(); + ASSERT_FALSE(str.empty()); + + status = engine::Status::NotFound("wrong", "mistake"); + ASSERT_TRUE(status.IsNotFound()); + str = status.ToString(); + ASSERT_FALSE(str.empty()); + + status = engine::Status::DBTransactionError("wrong", "mistake"); + ASSERT_TRUE(status.IsDBTransactionError()); + str = status.ToString(); + ASSERT_FALSE(str.empty()); +} + +TEST(DBMiscTest, OPTIONS_TEST) { + try { + engine::ArchiveConf archive("$$##"); + } catch (std::exception& ex) { + ASSERT_TRUE(true); + } + + { + engine::ArchiveConf archive("delete", "no"); + ASSERT_TRUE(archive.GetCriterias().empty()); + } + + { + engine::ArchiveConf archive("delete", "1:2"); + ASSERT_TRUE(archive.GetCriterias().empty()); + } + + { + engine::ArchiveConf archive("delete", "1:2:3"); + ASSERT_TRUE(archive.GetCriterias().empty()); + } +} + +TEST(DBMiscTest, META_TEST) { + engine::DBMetaOptions options; + options.path = "/tmp/milvus_test"; + engine::meta::DBMetaImpl impl(options); + + time_t tt; + time( &tt ); + int delta = 10; + engine::meta::DateT dt = impl.GetDate(tt, delta); + ASSERT_GT(dt, 0); +} \ No newline at end of file