提交 95ceb8c7 编写于 作者: F fishpenguin

memory usage increased slowly during searching vectors

上级 bbb1ed42
......@@ -5,6 +5,7 @@ Please mark all change in change log and use the ticket from JIRA.
# Milvus 0.6.0 (TODO)
## Bug
- \#228 - memory usage increased slowly during searching vectors
- \#246 - Exclude src/external folder from code coverage for jenkin ci
- \#248 - Reside src/external in thirdparty
......
......@@ -89,34 +89,35 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
}
}
auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems);
// TODO: magic
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
// auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
// auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
// auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
// auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>());
auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>());
std::vector<FieldPtr> fields{field_id, field_dist};
auto schema = std::make_shared<Schema>(fields);
return std::make_shared<Dataset>(array, schema);
// auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems);
//
// // TODO: magic
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
// // auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
// // auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
//
// // auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
// // auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
//
// auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>());
// auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>());
// std::vector<FieldPtr> fields{field_id, field_dist};
// auto schema = std::make_shared<Schema>(fields);
//
// return std::make_shared<Dataset>(array, schema);
return std::make_shared<Dataset>((void*)p_id, (void*)p_dist);
}
} // namespace knowhere
......@@ -54,6 +54,9 @@ class Dataset {
: tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) {
}
Dataset(void* ids, void* dists) : ids_(ids), dists_(dists) {
}
Dataset(const Dataset&) = delete;
Dataset&
operator=(const Dataset&) = delete;
......@@ -128,6 +131,16 @@ class Dataset {
tensor_schema_ = std::move(tensor_schema);
}
void*
ids() {
return ids_;
}
void*
dist() {
return dists_;
}
// const Config &
// meta() const { return meta_; }
......@@ -141,6 +154,9 @@ class Dataset {
SchemaPtr array_schema_;
std::vector<TensorPtr> tensor_;
SchemaPtr tensor_schema_;
// TODO(yukun): using smart pointer
void* ids_;
void* dists_;
// Config meta_;
};
......
......@@ -80,23 +80,24 @@ IDMAP::Search(const DatasetPtr& dataset, const Config& config) {
search_impl(rows, (float*)p_data, config->k, res_dis, res_ids, Config());
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
return std::make_shared<Dataset>(array, nullptr);
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
//
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
//
// return std::make_shared<Dataset>(array, nullptr);
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
}
void
......
......@@ -139,23 +139,23 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {
// std::cout << ss_res_id.str() << std::endl;
// std::cout << ss_res_dist.str() << std::endl << std::endl;
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
return std::make_shared<Dataset>(array, nullptr);
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
//
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
}
void
......
......@@ -88,23 +88,24 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) {
s_params.search_length = build_cfg->search_length;
index_->Search((float*)p_data, rows, dim, build_cfg->k, res_dis, res_ids, s_params);
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
return std::make_shared<Dataset>(array, nullptr);
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
//
// return std::make_shared<Dataset>(array, nullptr);
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
}
IndexModelPtr
......
......@@ -66,15 +66,19 @@ TEST_F(KDTTest, kdt_basic) {
AssertAnns(result, nq, k);
{
auto ids = result->array()[0];
auto dists = result->array()[1];
// auto ids = result->array()[0];
// auto dists = result->array()[1];
auto ids = result->ids();
auto dists = result->dist();
std::stringstream ss_id;
std::stringstream ss_dist;
for (auto i = 0; i < nq; i++) {
for (auto j = 0; j < k; ++j) {
ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
ss_id << *((int64_t*)(ids) + i * k + j) << " ";
ss_dist << *((float*)(dists) + i * k + j) << " ";
// ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
// ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
}
ss_id << std::endl;
ss_dist << std::endl;
......
......@@ -151,9 +151,10 @@ generate_query_dataset(int64_t nb, int64_t dim, float* xb) {
void
AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->array()[0];
auto ids = result->ids();
for (auto i = 0; i < nq; i++) {
EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
EXPECT_EQ(i, *((int64_t*)(ids) + i * k));
// EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
}
}
......
......@@ -84,8 +84,8 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i
Config search_cfg = cfg;
auto res = index_->Search(dataset, search_cfg);
auto ids_array = res->array()[0];
auto dis_array = res->array()[1];
// auto ids_array = res->array()[0];
// auto dis_array = res->array()[1];
//{
// auto& ids = ids_array;
......@@ -104,12 +104,14 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i
// std::cout << "dist\n" << ss_dist.str() << std::endl;
//}
auto p_ids = ids_array->data()->GetValues<int64_t>(1, 0);
auto p_dist = dis_array->data()->GetValues<float>(1, 0);
// auto p_ids = ids_array->data()->GetValues<int64_t>(1, 0);
// auto p_dist = dis_array->data()->GetValues<float>(1, 0);
// TODO(linxj): avoid copy here.
memcpy(ids, p_ids, sizeof(int64_t) * nq * k);
memcpy(dist, p_dist, sizeof(float) * nq * k);
memcpy(ids, res->ids(), sizeof(int64_t) * nq * k);
memcpy(dist, res->dist(), sizeof(float) * nq * k);
free(res->ids());
free(res->dist());
} catch (knowhere::KnowhereException& e) {
WRAPPER_LOG_ERROR << e.what();
return Status(KNOWHERE_UNEXPECTED_ERROR, e.what());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册