diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index b932a0ad36d7d4679b690dc4de64b91876a9d88c..8215c29eae5b0da5bc16c0abf826ecd6db03e00c 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -83,21 +83,6 @@ Status DBImpl::search(const std::string &group_id, size_t k, size_t nq, return Status::OK(); } - // merge raw files and build flat index. - faiss::Index *index(faiss::index_factory(dim, "IDMap,Flat")); - for (auto &file : raw_files) { - auto to_merge = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(file.location); - if (!to_merge) { - LOG(DEBUG) << "Disk io from: " << file.location; - to_merge = read_index(file.location.c_str()); - zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->InsertItem(file.location, to_merge); - } - auto file_index = dynamic_cast(to_merge->data().get()); - index->add_with_ids(file_index->ntotal, - dynamic_cast(file_index->index)->xb.data(), - file_index->id_map.data()); - } - { // [{ids, distence}, ...] using SearchResult = std::pair, std::vector>; @@ -120,25 +105,20 @@ Status DBImpl::search(const std::string &group_id, size_t k, size_t nq, memset(output_distence, 0, k * nq * sizeof(float)); memset(output_ids, 0, k * nq * sizeof(long)); - // search in raw file - index->search(nq, vectors, k, output_distence, output_ids); - cluster(output_ids, output_distence); // cluster to each query - memset(output_distence, 0, k * nq * sizeof(float)); - memset(output_ids, 0, k * nq * sizeof(long)); - - // Search in index file - for (auto &file : index_files) { - auto index = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(file.location); - if (!index) { - LOG(DEBUG) << "Disk io from: " << file.location; - index = read_index(file.location.c_str()); - zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->InsertItem(file.location, index); + auto search_in_index = [&](meta::GroupFilesSchema& file_vec) -> void { + for (auto &file : file_vec) { + auto index = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(file.location); + if (!index) { + LOG(DEBUG) << "Disk io from: " << file.location; + index = read_index(file.location.c_str()); + zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->InsertItem(file.location, index); + } + index->search(nq, vectors, k, output_distence, output_ids); + cluster(output_ids, output_distence); // cluster to each query + memset(output_distence, 0, k * nq * sizeof(float)); + memset(output_ids, 0, k * nq * sizeof(long)); } - index->search(nq, vectors, k, output_distence, output_ids); - cluster(output_ids, output_distence); // cluster to each query - memset(output_distence, 0, k * nq * sizeof(float)); - memset(output_ids, 0, k * nq * sizeof(long)); - } + }; auto cluster_topk = [&]() -> void { QueryResult res; @@ -151,8 +131,13 @@ Status DBImpl::search(const std::string &group_id, size_t k, size_t nq, } results.push_back(res); // append to result list res.clear(); + memset(output_distence, 0, k * nq * sizeof(float)); + memset(output_ids, 0, k * nq * sizeof(long)); } }; + + search_in_index(raw_files); + search_in_index(index_files); cluster_topk(); free(output_distence); diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index fd5fa58aafdfce1eff4dfc76d73934abf5e4106f..76eee79c7e5357735e8b053d6554607cb265f4fc 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -66,7 +66,7 @@ TEST(DBTest, DB_TEST) { qxb[d * i] += i / 2000.; } - int loop = 50000; + int loop = 500000; for (auto i=0; icount(group_name, count); @@ -127,7 +127,7 @@ TEST(SearchTest, DB_TEST) { // prepare raw data - size_t nb = 25000; + size_t nb = 250000; size_t nq = 10; size_t k = 5; std::vector xb(nb*group_dim); @@ -162,10 +162,11 @@ TEST(SearchTest, DB_TEST) { const int batch_size = 100; for (int j = 0; j < nb / batch_size; ++j) { stat = db->add_vectors(group_name, batch_size, xb.data()+batch_size*j*group_dim, ids); + if (j == 200){ sleep(1);} ASSERT_STATS(stat); } - //sleep(10); // wait until build index finish + sleep(3); // wait until build index finish engine::QueryResults results; stat = db->search(group_name, k, nq, xq.data(), results); @@ -174,6 +175,7 @@ TEST(SearchTest, DB_TEST) { // TODO(linxj): add groundTruth assert delete db; + engine::DB::Open(opt, &db); db->drop_all(); delete db;