From f674561e36983537979d47d2cecb7010d1b0705f Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 10 Sep 2019 16:43:17 +0800 Subject: [PATCH] MS-538 1. update unittest Former-commit-id: 38fefd469b3b69ccdcbfb3c741002d8e4703c702 --- .../knowhere/index/vector_index/idmap.h | 12 +++++ .../knowhere/index/preprocessor/normalize.cpp | 44 +++++++++---------- .../index/vector_index/cpu_kdt_rng.cpp | 12 ++--- .../knowhere/index/vector_index/gpu_ivf.cpp | 18 ++------ .../src/knowhere/index/vector_index/idmap.cpp | 18 +++++++- cpp/src/core/test/test_idmap.cpp | 35 +++++++++------ cpp/src/core/test/test_ivf.cpp | 16 ++++--- cpp/src/core/test/test_nsg/test_nsg.cpp | 9 ++++ 8 files changed, 102 insertions(+), 62 deletions(-) diff --git a/cpp/src/core/include/knowhere/index/vector_index/idmap.h b/cpp/src/core/include/knowhere/index/vector_index/idmap.h index d5d3a43f..95822490 100644 --- a/cpp/src/core/include/knowhere/index/vector_index/idmap.h +++ b/cpp/src/core/include/knowhere/index/vector_index/idmap.h @@ -32,6 +32,12 @@ class IDMAP : public VectorIndex, public BasicIndex { virtual int64_t *GetRawIds(); protected: + virtual void search_impl(int64_t n, + const float *data, + int64_t k, + float *distances, + int64_t *labels, + const Config &cfg); std::mutex mutex_; }; @@ -49,6 +55,12 @@ class GPUIDMAP : public IDMAP, public GPUIndex { VectorIndexPtr CopyGpuToGpu(const int64_t &device_id, const Config &config) override; protected: + void search_impl(int64_t n, + const float *data, + int64_t k, + float *distances, + int64_t *labels, + const Config &cfg) override; BinarySet SerializeImpl() override; void LoadImpl(const BinarySet &index_binary) override; }; diff --git a/cpp/src/core/src/knowhere/index/preprocessor/normalize.cpp b/cpp/src/core/src/knowhere/index/preprocessor/normalize.cpp index a2685434..9e9ae924 100644 --- a/cpp/src/core/src/knowhere/index/preprocessor/normalize.cpp +++ b/cpp/src/core/src/knowhere/index/preprocessor/normalize.cpp @@ -9,32 +9,32 @@ namespace knowhere { DatasetPtr NormalizePreprocessor::Preprocess(const DatasetPtr &dataset) { - // TODO: wrap dataset->tensor - auto tensor = dataset->tensor()[0]; - auto p_data = (float *)tensor->raw_mutable_data(); - auto dimension = tensor->shape()[1]; - auto rows = tensor->shape()[0]; - -#pragma omp parallel for - for (auto i = 0; i < rows; ++i) { - Normalize(&(p_data[i * dimension]), dimension); - } +// // TODO: wrap dataset->tensor +// auto tensor = dataset->tensor()[0]; +// auto p_data = (float *)tensor->raw_mutable_data(); +// auto dimension = tensor->shape()[1]; +// auto rows = tensor->shape()[0]; +// +//#pragma omp parallel for +// for (auto i = 0; i < rows; ++i) { +// Normalize(&(p_data[i * dimension]), dimension); +// } } void NormalizePreprocessor::Normalize(float *arr, int64_t dimension) { - double vector_length = 0; - for (auto j = 0; j < dimension; j++) { - double val = arr[j]; - vector_length += val * val; - } - vector_length = std::sqrt(vector_length); - if (vector_length < 1e-6) { - auto val = (float) (1.0 / std::sqrt((double) dimension)); - for (int j = 0; j < dimension; j++) arr[j] = val; - } else { - for (int j = 0; j < dimension; j++) arr[j] = (float) (arr[j] / vector_length); - } + //double vector_length = 0; + //for (auto j = 0; j < dimension; j++) { + // double val = arr[j]; + // vector_length += val * val; + //} + //vector_length = std::sqrt(vector_length); + //if (vector_length < 1e-6) { + // auto val = (float) (1.0 / std::sqrt((double) dimension)); + // for (int j = 0; j < dimension; j++) arr[j] = val; + //} else { + // for (int j = 0; j < dimension; j++) arr[j] = (float) (arr[j] / vector_length); + //} } } // namespace knowhere diff --git a/cpp/src/core/src/knowhere/index/vector_index/cpu_kdt_rng.cpp b/cpp/src/core/src/knowhere/index/vector_index/cpu_kdt_rng.cpp index de885941..ddcd8d78 100644 --- a/cpp/src/core/src/knowhere/index/vector_index/cpu_kdt_rng.cpp +++ b/cpp/src/core/src/knowhere/index/vector_index/cpu_kdt_rng.cpp @@ -70,10 +70,10 @@ CPUKDTRNG::Train(const DatasetPtr &origin, const Config &train_config) { SetParameters(train_config); DatasetPtr dataset = origin->Clone(); - if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine - && preprocessor_) { + //if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine + // && preprocessor_) { preprocessor_->Preprocess(dataset); - } + //} auto vectorset = ConvertToVectorSet(dataset); auto metaset = ConvertToMetadataSet(dataset); @@ -88,10 +88,10 @@ CPUKDTRNG::Add(const DatasetPtr &origin, const Config &add_config) { SetParameters(add_config); DatasetPtr dataset = origin->Clone(); - if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine - && preprocessor_) { + //if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine + // && preprocessor_) { preprocessor_->Preprocess(dataset); - } + //} auto vectorset = ConvertToVectorSet(dataset); auto metaset = ConvertToMetadataSet(dataset); diff --git a/cpp/src/core/src/knowhere/index/vector_index/gpu_ivf.cpp b/cpp/src/core/src/knowhere/index/vector_index/gpu_ivf.cpp index 1474ff2e..c1498c33 100644 --- a/cpp/src/core/src/knowhere/index/vector_index/gpu_ivf.cpp +++ b/cpp/src/core/src/knowhere/index/vector_index/gpu_ivf.cpp @@ -280,15 +280,15 @@ void FaissGpuResourceMgr::InitResource() { is_init = true; - std::cout << "InitResource" << std::endl; + //std::cout << "InitResource" << std::endl; for(auto& device : devices_params_) { auto& device_id = device.first; - std::cout << "Device Id: " << device_id << std::endl; + //std::cout << "Device Id: " << device_id << std::endl; auto& device_param = device.second; auto& bq = idle_map[device_id]; for (int64_t i = 0; i < device_param.resource_num; ++i) { - std::cout << "Resource Id: " << i << std::endl; + //std::cout << "Resource Id: " << i << std::endl; auto raw_resource = std::make_shared(); // TODO(linxj): enable set pinned memory @@ -298,7 +298,7 @@ void FaissGpuResourceMgr::InitResource() { bq.Put(res_wrapper); } } - std::cout << "End initResource" << std::endl; + //std::cout << "End initResource" << std::endl; } ResPtr FaissGpuResourceMgr::GetRes(const int64_t &device_id, @@ -315,16 +315,6 @@ ResPtr FaissGpuResourceMgr::GetRes(const int64_t &device_id, return nullptr; } -//bool FaissGpuResourceMgr::GetRes(const int64_t &device_id, -// ResPtr &res, -// const int64_t &alloc_size) { -// InitResource(); -// -// std::lock_guard lk(res->mutex); -// AllocateTempMem(res, device_id, alloc_size); -// return true; -//} - void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const ResPtr &res) { auto finder = idle_map.find(device_id); if (finder != idle_map.end()) { diff --git a/cpp/src/core/src/knowhere/index/vector_index/idmap.cpp b/cpp/src/core/src/knowhere/index/vector_index/idmap.cpp index 93fdde5c..1c524803 100644 --- a/cpp/src/core/src/knowhere/index/vector_index/idmap.cpp +++ b/cpp/src/core/src/knowhere/index/vector_index/idmap.cpp @@ -50,7 +50,7 @@ DatasetPtr IDMAP::Search(const DatasetPtr &dataset, const Config &config) { auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems); auto res_dis = (float *) malloc(sizeof(float) * elems); - index_->search(rows, (float *) p_data, k, res_dis, res_ids); + search_impl(rows, (float *) p_data, k, res_dis, res_ids, Config()); auto id_buf = MakeMutableBufferSmart((uint8_t *) res_ids, sizeof(int64_t) * elems); auto dist_buf = MakeMutableBufferSmart((uint8_t *) res_dis, sizeof(float) * elems); @@ -72,6 +72,11 @@ DatasetPtr IDMAP::Search(const DatasetPtr &dataset, const Config &config) { return std::make_shared(array, nullptr); } +void IDMAP::search_impl(int64_t n, const float *data, int64_t k, float *distances, int64_t *labels, const Config &cfg) { + index_->search(n, (float *) data, k, distances, labels); + +} + void IDMAP::Add(const DatasetPtr &dataset, const Config &config) { if (!index_) { KNOWHERE_THROW_MSG("index not initialize"); @@ -207,6 +212,7 @@ void GPUIDMAP::LoadImpl(const BinarySet &index_binary) { if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_) ){ ResScope rs(gpu_id_, res); + res_ = res; auto device_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index); index_.reset(device_index); } else { @@ -230,5 +236,15 @@ int64_t *GPUIDMAP::GetRawIds() { KNOWHERE_THROW_MSG("Not support"); } +void GPUIDMAP::search_impl(int64_t n, + const float *data, + int64_t k, + float *distances, + int64_t *labels, + const Config &cfg) { + ResScope rs(res_); + index_->search(n, (float *) data, k, distances, labels); +} + } } diff --git a/cpp/src/core/test/test_idmap.cpp b/cpp/src/core/test/test_idmap.cpp index b0e6dd6d..9a8001be 100644 --- a/cpp/src/core/test/test_idmap.cpp +++ b/cpp/src/core/test/test_idmap.cpp @@ -12,6 +12,7 @@ #include "knowhere/index/vector_index/idmap.h" #include "knowhere/adapter/structure.h" #include "knowhere/index/vector_index/cloner.h" +#include "knowhere/common/exception.h" #include "utils.h" @@ -65,19 +66,20 @@ void PrintResult(const DatasetPtr &result, } TEST_F(IDMAPTest, idmap_basic) { - assert(!xb.empty()); + ASSERT_TRUE(!xb.empty()); Config Default_cfg; index_->Train(Config::object{{"dim", dim}, {"metric_type", "L2"}}); index_->Add(base_dataset, Default_cfg); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - assert(index_->GetRawVectors() != nullptr); - assert(index_->GetRawIds() != nullptr); + ASSERT_TRUE(index_->GetRawVectors() != nullptr); + ASSERT_TRUE(index_->GetRawIds() != nullptr); auto result = index_->Search(query_dataset, Config::object{{"k", k}}); AssertAnns(result, nq, k); PrintResult(result, nq, k); + index_->Seal(); auto binaryset = index_->Serialize(); auto new_index = std::make_shared(); new_index->Load(binaryset); @@ -126,15 +128,15 @@ TEST_F(IDMAPTest, idmap_serialize) { } TEST_F(IDMAPTest, copy_test) { - assert(!xb.empty()); + ASSERT_TRUE(!xb.empty()); Config Default_cfg; index_->Train(Config::object{{"dim", dim}, {"metric_type", "L2"}}); index_->Add(base_dataset, Default_cfg); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - assert(index_->GetRawVectors() != nullptr); - assert(index_->GetRawIds() != nullptr); + ASSERT_TRUE(index_->GetRawVectors() != nullptr); + ASSERT_TRUE(index_->GetRawIds() != nullptr); auto result = index_->Search(query_dataset, Config::object{{"k", k}}); AssertAnns(result, nq, k); //PrintResult(result, nq, k); @@ -151,8 +153,16 @@ TEST_F(IDMAPTest, copy_test) { auto clone_index = CopyCpuToGpu(index_, device_id, Config()); auto clone_result = clone_index->Search(query_dataset, Config::object{{"k", k}}); AssertAnns(clone_result, nq, k); - //assert(std::static_pointer_cast(clone_index)->GetRawVectors() != nullptr); - //assert(std::static_pointer_cast(clone_index)->GetRawIds() != nullptr); + ASSERT_THROW({ std::static_pointer_cast(clone_index)->GetRawVectors(); }, + zilliz::knowhere::KnowhereException); + ASSERT_THROW({ std::static_pointer_cast(clone_index)->GetRawIds(); }, + zilliz::knowhere::KnowhereException); + + auto binary = clone_index->Serialize(); + clone_index->Load(binary); + auto new_result = clone_index->Search(query_dataset, Config::object{{"k", k}}); + AssertAnns(new_result, nq, k); + auto clone_gpu_idx = clone_index->Clone(); auto clone_gpu_res = clone_gpu_idx->Search(query_dataset, Config::object{{"k", k}}); AssertAnns(clone_gpu_res, nq, k); @@ -161,14 +171,13 @@ TEST_F(IDMAPTest, copy_test) { auto host_index = CopyGpuToCpu(clone_index, Config()); auto host_result = host_index->Search(query_dataset, Config::object{{"k", k}}); AssertAnns(host_result, nq, k); - assert(std::static_pointer_cast(host_index)->GetRawVectors() != nullptr); - assert(std::static_pointer_cast(host_index)->GetRawIds() != nullptr); + ASSERT_TRUE(std::static_pointer_cast(host_index)->GetRawVectors() != nullptr); + ASSERT_TRUE(std::static_pointer_cast(host_index)->GetRawIds() != nullptr); // gpu to gpu auto device_index = CopyCpuToGpu(index_, device_id, Config()); - auto device_result = device_index->Search(query_dataset, Config::object{{"k", k}}); + auto new_device_index = std::static_pointer_cast(device_index)->CopyGpuToGpu(device_id, Config()); + auto device_result = new_device_index->Search(query_dataset, Config::object{{"k", k}}); AssertAnns(device_result, nq, k); - //assert(std::static_pointer_cast(device_index)->GetRawVectors() != nullptr); - //assert(std::static_pointer_cast(device_index)->GetRawIds() != nullptr); } } diff --git a/cpp/src/core/test/test_ivf.cpp b/cpp/src/core/test/test_ivf.cpp index 625a9ca0..5b1743ec 100644 --- a/cpp/src/core/test/test_ivf.cpp +++ b/cpp/src/core/test/test_ivf.cpp @@ -394,8 +394,11 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) { { - index_type = "GPUIVF"; - index_ = IndexFactory(index_type); + index_ = std::make_shared(-1); + ASSERT_EQ(std::dynamic_pointer_cast(index_)->GetGpuDevice(), -1); + std::dynamic_pointer_cast(index_)->SetGpuDevice(device_id); + ASSERT_EQ(std::dynamic_pointer_cast(index_)->GetGpuDevice(), device_id); + auto preprocessor = index_->BuildPreprocessor(base_dataset, preprocess_cfg); index_->set_preprocessor(preprocessor); train_cfg = Config::object{{"nlist", 1638}, {"gpu_id", device_id}, {"metric_type", "L2"}}; @@ -412,8 +415,9 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) { if (i > search_count - 6 || i < 5) tc.RecordSection("search once"); } - tc.RecordSection("search all"); + tc.ElapseFromBegin("search all"); } + FaissGpuResourceMgr::GetInstance().Dump(); { // IVF-Search @@ -430,7 +434,7 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) { if (i > search_count - 6 || i < 5) tc.RecordSection("search once"); } - tc.RecordSection("search all"); + tc.ElapseFromBegin("search all"); } } @@ -461,7 +465,7 @@ TEST_F(GPURESTEST, gpuivfsq) { if (i > search_count - 6 || i < 5) tc.RecordSection("search once"); } - tc.RecordSection("search all"); + tc.ElapseFromBegin("search all"); } { @@ -493,7 +497,7 @@ TEST_F(GPURESTEST, gpuivfsq) { if (i > search_count - 6 || i < 5) tc.RecordSection("search once"); } - tc.RecordSection("search all"); + tc.ElapseFromBegin("search all"); delete cpu_index; delete search_idx; } diff --git a/cpp/src/core/test/test_nsg/test_nsg.cpp b/cpp/src/core/test/test_nsg/test_nsg.cpp index 3c8d4777..6d378c41 100644 --- a/cpp/src/core/test/test_nsg/test_nsg.cpp +++ b/cpp/src/core/test/test_nsg/test_nsg.cpp @@ -7,6 +7,7 @@ #include #include +#include "knowhere/common/exception.h" #include "knowhere/index/vector_index/gpu_ivf.h" #include "knowhere/index/vector_index/nsg_index.h" #include "knowhere/index/vector_index/nsg/nsg_io.h" @@ -71,6 +72,14 @@ TEST_P(NSGInterfaceTest, basic_test) { auto new_result = new_index->Search(query_dataset, Config::object{{"k", k}}); AssertAnns(result, nq, k); + ASSERT_EQ(index_->Count(), nb); + ASSERT_EQ(index_->Dimension(), dim); + ASSERT_THROW({index_->Clone();}, zilliz::knowhere::KnowhereException); + ASSERT_NO_THROW({ + index_->Add(base_dataset, Config()); + index_->Seal(); + }); + { //std::cout << "k = 1" << std::endl; //new_index->Search(GenQuery(1), Config::object{{"k", 1}}); -- GitLab