提交 95ceb8c7 编写于 作者: F fishpenguin

memory usage increased slowly during searching vectors

上级 bbb1ed42
...@@ -5,6 +5,7 @@ Please mark all change in change log and use the ticket from JIRA. ...@@ -5,6 +5,7 @@ Please mark all change in change log and use the ticket from JIRA.
# Milvus 0.6.0 (TODO) # Milvus 0.6.0 (TODO)
## Bug ## Bug
- \#228 - memory usage increased slowly during searching vectors
- \#246 - Exclude src/external folder from code coverage for jenkin ci - \#246 - Exclude src/external folder from code coverage for jenkin ci
- \#248 - Reside src/external in thirdparty - \#248 - Reside src/external in thirdparty
......
...@@ -89,34 +89,35 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) { ...@@ -89,34 +89,35 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
} }
} }
auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems); // auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems); // auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems);
//
// TODO: magic // // TODO: magic
std::vector<BufferPtr> id_bufs{nullptr, id_buf}; // std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf}; // std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
auto int64_type = std::make_shared<arrow::Int64Type>(); // auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>(); // auto float_type = std::make_shared<arrow::FloatType>();
//
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
// auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs); // // auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
// auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs); // // auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
//
// auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems); // // auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
// auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems); // // auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
//
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data); // auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data); // auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists}; // std::vector<ArrayPtr> array{ids, dists};
//
auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>()); // auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>());
auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>()); // auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>());
std::vector<FieldPtr> fields{field_id, field_dist}; // std::vector<FieldPtr> fields{field_id, field_dist};
auto schema = std::make_shared<Schema>(fields); // auto schema = std::make_shared<Schema>(fields);
//
return std::make_shared<Dataset>(array, schema); // return std::make_shared<Dataset>(array, schema);
return std::make_shared<Dataset>((void*)p_id, (void*)p_dist);
} }
} // namespace knowhere } // namespace knowhere
...@@ -54,6 +54,9 @@ class Dataset { ...@@ -54,6 +54,9 @@ class Dataset {
: tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) { : tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) {
} }
Dataset(void* ids, void* dists) : ids_(ids), dists_(dists) {
}
Dataset(const Dataset&) = delete; Dataset(const Dataset&) = delete;
Dataset& Dataset&
operator=(const Dataset&) = delete; operator=(const Dataset&) = delete;
...@@ -128,6 +131,16 @@ class Dataset { ...@@ -128,6 +131,16 @@ class Dataset {
tensor_schema_ = std::move(tensor_schema); tensor_schema_ = std::move(tensor_schema);
} }
void*
ids() {
return ids_;
}
void*
dist() {
return dists_;
}
// const Config & // const Config &
// meta() const { return meta_; } // meta() const { return meta_; }
...@@ -141,6 +154,9 @@ class Dataset { ...@@ -141,6 +154,9 @@ class Dataset {
SchemaPtr array_schema_; SchemaPtr array_schema_;
std::vector<TensorPtr> tensor_; std::vector<TensorPtr> tensor_;
SchemaPtr tensor_schema_; SchemaPtr tensor_schema_;
// TODO(yukun): using smart pointer
void* ids_;
void* dists_;
// Config meta_; // Config meta_;
}; };
......
...@@ -80,23 +80,24 @@ IDMAP::Search(const DatasetPtr& dataset, const Config& config) { ...@@ -80,23 +80,24 @@ IDMAP::Search(const DatasetPtr& dataset, const Config& config) {
search_impl(rows, (float*)p_data, config->k, res_dis, res_ids, Config()); search_impl(rows, (float*)p_data, config->k, res_dis, res_ids, Config());
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
//
std::vector<BufferPtr> id_bufs{nullptr, id_buf}; // std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf}; // std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
auto int64_type = std::make_shared<arrow::Int64Type>(); // auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>(); // auto float_type = std::make_shared<arrow::FloatType>();
//
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data); // auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data); // auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists}; // std::vector<ArrayPtr> array{ids, dists};
//
return std::make_shared<Dataset>(array, nullptr); // return std::make_shared<Dataset>(array, nullptr);
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
} }
void void
......
...@@ -139,23 +139,23 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { ...@@ -139,23 +139,23 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {
// std::cout << ss_res_id.str() << std::endl; // std::cout << ss_res_id.str() << std::endl;
// std::cout << ss_res_dist.str() << std::endl << std::endl; // std::cout << ss_res_dist.str() << std::endl << std::endl;
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
//
std::vector<BufferPtr> id_bufs{nullptr, id_buf}; // std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf}; // std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
auto int64_type = std::make_shared<arrow::Int64Type>(); // auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>(); // auto float_type = std::make_shared<arrow::FloatType>();
//
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data); // auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data); // auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists}; // std::vector<ArrayPtr> array{ids, dists};
return std::make_shared<Dataset>(array, nullptr); return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
} }
void void
......
...@@ -88,23 +88,24 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) { ...@@ -88,23 +88,24 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) {
s_params.search_length = build_cfg->search_length; s_params.search_length = build_cfg->search_length;
index_->Search((float*)p_data, rows, dim, build_cfg->k, res_dis, res_ids, s_params); index_->Search((float*)p_data, rows, dim, build_cfg->k, res_dis, res_ids, s_params);
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
std::vector<BufferPtr> id_bufs{nullptr, id_buf}; // std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf}; // std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
auto int64_type = std::make_shared<arrow::Int64Type>(); // auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>(); // auto float_type = std::make_shared<arrow::FloatType>();
//
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data); // auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data); // auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists}; // std::vector<ArrayPtr> array{ids, dists};
//
return std::make_shared<Dataset>(array, nullptr); // return std::make_shared<Dataset>(array, nullptr);
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
} }
IndexModelPtr IndexModelPtr
......
...@@ -66,15 +66,19 @@ TEST_F(KDTTest, kdt_basic) { ...@@ -66,15 +66,19 @@ TEST_F(KDTTest, kdt_basic) {
AssertAnns(result, nq, k); AssertAnns(result, nq, k);
{ {
auto ids = result->array()[0]; // auto ids = result->array()[0];
auto dists = result->array()[1]; // auto dists = result->array()[1];
auto ids = result->ids();
auto dists = result->dist();
std::stringstream ss_id; std::stringstream ss_id;
std::stringstream ss_dist; std::stringstream ss_dist;
for (auto i = 0; i < nq; i++) { for (auto i = 0; i < nq; i++) {
for (auto j = 0; j < k; ++j) { for (auto j = 0; j < k; ++j) {
ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " "; ss_id << *((int64_t*)(ids) + i * k + j) << " ";
ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " "; ss_dist << *((float*)(dists) + i * k + j) << " ";
// ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
// ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
} }
ss_id << std::endl; ss_id << std::endl;
ss_dist << std::endl; ss_dist << std::endl;
......
...@@ -151,9 +151,10 @@ generate_query_dataset(int64_t nb, int64_t dim, float* xb) { ...@@ -151,9 +151,10 @@ generate_query_dataset(int64_t nb, int64_t dim, float* xb) {
void void
AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) { AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->array()[0]; auto ids = result->ids();
for (auto i = 0; i < nq; i++) { for (auto i = 0; i < nq; i++) {
EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k))); EXPECT_EQ(i, *((int64_t*)(ids) + i * k));
// EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
} }
} }
......
...@@ -84,8 +84,8 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i ...@@ -84,8 +84,8 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i
Config search_cfg = cfg; Config search_cfg = cfg;
auto res = index_->Search(dataset, search_cfg); auto res = index_->Search(dataset, search_cfg);
auto ids_array = res->array()[0]; // auto ids_array = res->array()[0];
auto dis_array = res->array()[1]; // auto dis_array = res->array()[1];
//{ //{
// auto& ids = ids_array; // auto& ids = ids_array;
...@@ -104,12 +104,14 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i ...@@ -104,12 +104,14 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i
// std::cout << "dist\n" << ss_dist.str() << std::endl; // std::cout << "dist\n" << ss_dist.str() << std::endl;
//} //}
auto p_ids = ids_array->data()->GetValues<int64_t>(1, 0); // auto p_ids = ids_array->data()->GetValues<int64_t>(1, 0);
auto p_dist = dis_array->data()->GetValues<float>(1, 0); // auto p_dist = dis_array->data()->GetValues<float>(1, 0);
// TODO(linxj): avoid copy here. // TODO(linxj): avoid copy here.
memcpy(ids, p_ids, sizeof(int64_t) * nq * k); memcpy(ids, res->ids(), sizeof(int64_t) * nq * k);
memcpy(dist, p_dist, sizeof(float) * nq * k); memcpy(dist, res->dist(), sizeof(float) * nq * k);
free(res->ids());
free(res->dist());
} catch (knowhere::KnowhereException& e) { } catch (knowhere::KnowhereException& e) {
WRAPPER_LOG_ERROR << e.what(); WRAPPER_LOG_ERROR << e.what();
return Status(KNOWHERE_UNEXPECTED_ERROR, e.what()); return Status(KNOWHERE_UNEXPECTED_ERROR, e.what());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册