From 95ceb8c7ba67acab4ef2b861cf67026ca174bd0a Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Thu, 14 Nov 2019 20:22:49 +0800 Subject: [PATCH] memory usage increased slowly during searching vectors --- CHANGELOG.md | 1 + .../knowhere/adapter/SptagAdapter.cpp | 57 ++++++++++--------- .../index/knowhere/knowhere/common/Dataset.h | 16 ++++++ .../index/vector_index/IndexIDMAP.cpp | 35 ++++++------ .../knowhere/index/vector_index/IndexIVF.cpp | 34 +++++------ .../knowhere/index/vector_index/IndexNSG.cpp | 35 ++++++------ core/src/index/unittest/test_kdt.cpp | 12 ++-- core/src/index/unittest/utils.cpp | 5 +- core/src/wrapper/VecImpl.cpp | 14 +++-- 9 files changed, 118 insertions(+), 91 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb07803e..988e177a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Please mark all change in change log and use the ticket from JIRA. # Milvus 0.6.0 (TODO) ## Bug +- \#228 - memory usage increased slowly during searching vectors - \#246 - Exclude src/external folder from code coverage for jenkin ci - \#248 - Reside src/external in thirdparty diff --git a/core/src/index/knowhere/knowhere/adapter/SptagAdapter.cpp b/core/src/index/knowhere/knowhere/adapter/SptagAdapter.cpp index b4c3910a..db4a4152 100644 --- a/core/src/index/knowhere/knowhere/adapter/SptagAdapter.cpp +++ b/core/src/index/knowhere/knowhere/adapter/SptagAdapter.cpp @@ -89,34 +89,35 @@ ConvertToDataset(std::vector query_results) { } } - auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems); - auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems); - - // TODO: magic - std::vector id_bufs{nullptr, id_buf}; - std::vector dist_bufs{nullptr, dist_buf}; - - auto int64_type = std::make_shared(); - auto float_type = std::make_shared(); - - auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); - auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); - // auto id_array_data = std::make_shared(int64_type, sizeof(int64_t) * elems, id_bufs); - // auto dist_array_data = std::make_shared(float_type, sizeof(float) * elems, dist_bufs); - - // auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems); - // auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems); - - auto ids = std::make_shared>(id_array_data); - auto dists = std::make_shared>(dist_array_data); - std::vector array{ids, dists}; - - auto field_id = std::make_shared("id", std::make_shared()); - auto field_dist = std::make_shared("dist", std::make_shared()); - std::vector fields{field_id, field_dist}; - auto schema = std::make_shared(fields); - - return std::make_shared(array, schema); + // auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems); + // auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems); + // + // // TODO: magic + // std::vector id_bufs{nullptr, id_buf}; + // std::vector dist_bufs{nullptr, dist_buf}; + // + // auto int64_type = std::make_shared(); + // auto float_type = std::make_shared(); + // + // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); + // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); + // // auto id_array_data = std::make_shared(int64_type, sizeof(int64_t) * elems, id_bufs); + // // auto dist_array_data = std::make_shared(float_type, sizeof(float) * elems, dist_bufs); + // + // // auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems); + // // auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems); + // + // auto ids = std::make_shared>(id_array_data); + // auto dists = std::make_shared>(dist_array_data); + // std::vector array{ids, dists}; + // + // auto field_id = std::make_shared("id", std::make_shared()); + // auto field_dist = std::make_shared("dist", std::make_shared()); + // std::vector fields{field_id, field_dist}; + // auto schema = std::make_shared(fields); + // + // return std::make_shared(array, schema); + return std::make_shared((void*)p_id, (void*)p_dist); } } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/common/Dataset.h b/core/src/index/knowhere/knowhere/common/Dataset.h index 1331239d..b101aba6 100644 --- a/core/src/index/knowhere/knowhere/common/Dataset.h +++ b/core/src/index/knowhere/knowhere/common/Dataset.h @@ -54,6 +54,9 @@ class Dataset { : tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) { } + Dataset(void* ids, void* dists) : ids_(ids), dists_(dists) { + } + Dataset(const Dataset&) = delete; Dataset& operator=(const Dataset&) = delete; @@ -128,6 +131,16 @@ class Dataset { tensor_schema_ = std::move(tensor_schema); } + void* + ids() { + return ids_; + } + + void* + dist() { + return dists_; + } + // const Config & // meta() const { return meta_; } @@ -141,6 +154,9 @@ class Dataset { SchemaPtr array_schema_; std::vector tensor_; SchemaPtr tensor_schema_; + // TODO(yukun): using smart pointer + void* ids_; + void* dists_; // Config meta_; }; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp index 98d25e5e..7aedf986 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp @@ -80,23 +80,24 @@ IDMAP::Search(const DatasetPtr& dataset, const Config& config) { search_impl(rows, (float*)p_data, config->k, res_dis, res_ids, Config()); - auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); - auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); - - std::vector id_bufs{nullptr, id_buf}; - std::vector dist_bufs{nullptr, dist_buf}; - - auto int64_type = std::make_shared(); - auto float_type = std::make_shared(); - - auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); - auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); - - auto ids = std::make_shared>(id_array_data); - auto dists = std::make_shared>(dist_array_data); - std::vector array{ids, dists}; - - return std::make_shared(array, nullptr); + // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); + // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); + // + // std::vector id_bufs{nullptr, id_buf}; + // std::vector dist_bufs{nullptr, dist_buf}; + // + // auto int64_type = std::make_shared(); + // auto float_type = std::make_shared(); + // + // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); + // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); + // + // auto ids = std::make_shared>(id_array_data); + // auto dists = std::make_shared>(dist_array_data); + // std::vector array{ids, dists}; + // + // return std::make_shared(array, nullptr); + return std::make_shared((void*)res_ids, (void*)res_dis); } void diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index b2a2af29..7f30a97e 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -139,23 +139,23 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { // std::cout << ss_res_id.str() << std::endl; // std::cout << ss_res_dist.str() << std::endl << std::endl; - auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); - auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); - - std::vector id_bufs{nullptr, id_buf}; - std::vector dist_bufs{nullptr, dist_buf}; - - auto int64_type = std::make_shared(); - auto float_type = std::make_shared(); - - auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); - auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); - - auto ids = std::make_shared>(id_array_data); - auto dists = std::make_shared>(dist_array_data); - std::vector array{ids, dists}; - - return std::make_shared(array, nullptr); + // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); + // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); + // + // std::vector id_bufs{nullptr, id_buf}; + // std::vector dist_bufs{nullptr, dist_buf}; + // + // auto int64_type = std::make_shared(); + // auto float_type = std::make_shared(); + // + // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); + // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); + // + // auto ids = std::make_shared>(id_array_data); + // auto dists = std::make_shared>(dist_array_data); + // std::vector array{ids, dists}; + + return std::make_shared((void*)res_ids, (void*)res_dis); } void diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index 8f6d93d7..20481951 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -88,23 +88,24 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) { s_params.search_length = build_cfg->search_length; index_->Search((float*)p_data, rows, dim, build_cfg->k, res_dis, res_ids, s_params); - auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); - auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); - - std::vector id_bufs{nullptr, id_buf}; - std::vector dist_bufs{nullptr, dist_buf}; - - auto int64_type = std::make_shared(); - auto float_type = std::make_shared(); - - auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); - auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); - - auto ids = std::make_shared>(id_array_data); - auto dists = std::make_shared>(dist_array_data); - std::vector array{ids, dists}; - - return std::make_shared(array, nullptr); + // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); + // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); + + // std::vector id_bufs{nullptr, id_buf}; + // std::vector dist_bufs{nullptr, dist_buf}; + // + // auto int64_type = std::make_shared(); + // auto float_type = std::make_shared(); + // + // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); + // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); + // + // auto ids = std::make_shared>(id_array_data); + // auto dists = std::make_shared>(dist_array_data); + // std::vector array{ids, dists}; + // + // return std::make_shared(array, nullptr); + return std::make_shared((void*)res_ids, (void*)res_dis); } IndexModelPtr diff --git a/core/src/index/unittest/test_kdt.cpp b/core/src/index/unittest/test_kdt.cpp index 54008818..bbc7dcf9 100644 --- a/core/src/index/unittest/test_kdt.cpp +++ b/core/src/index/unittest/test_kdt.cpp @@ -66,15 +66,19 @@ TEST_F(KDTTest, kdt_basic) { AssertAnns(result, nq, k); { - auto ids = result->array()[0]; - auto dists = result->array()[1]; + // auto ids = result->array()[0]; + // auto dists = result->array()[1]; + auto ids = result->ids(); + auto dists = result->dist(); std::stringstream ss_id; std::stringstream ss_dist; for (auto i = 0; i < nq; i++) { for (auto j = 0; j < k; ++j) { - ss_id << *ids->data()->GetValues(1, i * k + j) << " "; - ss_dist << *dists->data()->GetValues(1, i * k + j) << " "; + ss_id << *((int64_t*)(ids) + i * k + j) << " "; + ss_dist << *((float*)(dists) + i * k + j) << " "; + // ss_id << *ids->data()->GetValues(1, i * k + j) << " "; + // ss_dist << *dists->data()->GetValues(1, i * k + j) << " "; } ss_id << std::endl; ss_dist << std::endl; diff --git a/core/src/index/unittest/utils.cpp b/core/src/index/unittest/utils.cpp index d4a59baf..2556b60f 100644 --- a/core/src/index/unittest/utils.cpp +++ b/core/src/index/unittest/utils.cpp @@ -151,9 +151,10 @@ generate_query_dataset(int64_t nb, int64_t dim, float* xb) { void AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) { - auto ids = result->array()[0]; + auto ids = result->ids(); for (auto i = 0; i < nq; i++) { - EXPECT_EQ(i, *(ids->data()->GetValues(1, i * k))); + EXPECT_EQ(i, *((int64_t*)(ids) + i * k)); + // EXPECT_EQ(i, *(ids->data()->GetValues(1, i * k))); } } diff --git a/core/src/wrapper/VecImpl.cpp b/core/src/wrapper/VecImpl.cpp index 05293b53..e7967cbf 100644 --- a/core/src/wrapper/VecImpl.cpp +++ b/core/src/wrapper/VecImpl.cpp @@ -84,8 +84,8 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i Config search_cfg = cfg; auto res = index_->Search(dataset, search_cfg); - auto ids_array = res->array()[0]; - auto dis_array = res->array()[1]; + // auto ids_array = res->array()[0]; + // auto dis_array = res->array()[1]; //{ // auto& ids = ids_array; @@ -104,12 +104,14 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i // std::cout << "dist\n" << ss_dist.str() << std::endl; //} - auto p_ids = ids_array->data()->GetValues(1, 0); - auto p_dist = dis_array->data()->GetValues(1, 0); + // auto p_ids = ids_array->data()->GetValues(1, 0); + // auto p_dist = dis_array->data()->GetValues(1, 0); // TODO(linxj): avoid copy here. - memcpy(ids, p_ids, sizeof(int64_t) * nq * k); - memcpy(dist, p_dist, sizeof(float) * nq * k); + memcpy(ids, res->ids(), sizeof(int64_t) * nq * k); + memcpy(dist, res->dist(), sizeof(float) * nq * k); + free(res->ids()); + free(res->dist()); } catch (knowhere::KnowhereException& e) { WRAPPER_LOG_ERROR << e.what(); return Status(KNOWHERE_UNEXPECTED_ERROR, e.what()); -- GitLab