提交 bccf8d20 编写于 作者: D dragondriver 提交者: yefu.chen

Add query support for ivf-flat, nsg

Signed-off-by: Ndragondriver <jiquan.long@zilliz.com>
上级 d0c78762
......@@ -55,6 +55,7 @@ IndexWrapper::parse_impl(const std::string& serialized_params_str, knowhere::Con
}
auto stoi_closure = [](const std::string& s) -> int { return std::stoi(s); };
auto stof_closure = [](const std::string& s) -> int { return std::stof(s); };
/***************************** meta *******************************/
check_parameter<int>(conf, milvus::knowhere::meta::DIM, stoi_closure, std::nullopt);
......@@ -88,7 +89,7 @@ IndexWrapper::parse_impl(const std::string& serialized_params_str, knowhere::Con
check_parameter<int>(conf, milvus::knowhere::IndexParams::edge_size, stoi_closure, std::nullopt);
/************************** NGT Search Params *****************************/
check_parameter<int>(conf, milvus::knowhere::IndexParams::epsilon, stoi_closure, std::nullopt);
check_parameter<float>(conf, milvus::knowhere::IndexParams::epsilon, stof_closure, std::nullopt);
check_parameter<int>(conf, milvus::knowhere::IndexParams::max_search_edges, stoi_closure, std::nullopt);
/************************** NGT_PANNG Params *****************************/
......@@ -274,6 +275,12 @@ IndexWrapper::QueryWithParam(const knowhere::DatasetPtr& dataset, const char* se
std::unique_ptr<IndexWrapper::QueryResult>
IndexWrapper::QueryImpl(const knowhere::DatasetPtr& dataset, const knowhere::Config& conf) {
auto load_raw_data_closure = [&]() { LoadRawData(); }; // hide this pointer
auto index_type = get_index_type();
if (is_in_nm_list(index_type)) {
std::call_once(raw_data_loaded_, load_raw_data_closure);
}
auto res = index_->Query(dataset, conf, nullptr);
auto ids = res->Get<int64_t*>(milvus::knowhere::meta::IDS);
auto distances = res->Get<float*>(milvus::knowhere::meta::DISTANCE);
......@@ -291,5 +298,19 @@ IndexWrapper::QueryImpl(const knowhere::DatasetPtr& dataset, const knowhere::Con
return std::move(query_res);
}
void
IndexWrapper::LoadRawData() {
auto index_type = get_index_type();
if (is_in_nm_list(index_type)) {
auto bs = index_->Serialize(config_);
auto bptr = std::make_shared<milvus::knowhere::Binary>();
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
bptr->data = std::shared_ptr<uint8_t[]>(static_cast<uint8_t*>(raw_data_.data()), deleter);
bptr->size = raw_data_.size();
bs.Append(RAW_DATA, bptr);
index_->Load(bs);
}
}
} // namespace indexbuilder
} // namespace milvus
......@@ -66,6 +66,9 @@ class IndexWrapper {
void
StoreRawData(const knowhere::DatasetPtr& dataset);
void
LoadRawData();
template <typename T>
void
check_parameter(knowhere::Config& conf,
......@@ -92,6 +95,7 @@ class IndexWrapper {
milvus::json index_config_;
knowhere::Config config_;
std::vector<uint8_t> raw_data_;
std::once_flag raw_data_loaded_;
};
} // namespace indexbuilder
......
......@@ -11,6 +11,8 @@
#include <tuple>
#include <map>
#include <limits>
#include <math.h>
#include <gtest/gtest.h>
#include <google/protobuf/text_format.h>
......@@ -41,16 +43,16 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IDMAP) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::nlist, 100},
// {milvus::knowhere::IndexParams::nprobe, 4},
{milvus::knowhere::IndexParams::nprobe, 4},
{milvus::knowhere::IndexParams::m, 4},
{milvus::knowhere::IndexParams::nbits, 8},
{milvus::knowhere::Metric::TYPE, metric_type},
......@@ -59,9 +61,9 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::nlist, 100},
// {milvus::knowhere::IndexParams::nprobe, 4},
{milvus::knowhere::IndexParams::nprobe, 4},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
#ifdef MILVUS_GPU_VERSION
......@@ -71,9 +73,9 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::nlist, 100},
// {milvus::knowhere::IndexParams::nprobe, 4},
{milvus::knowhere::IndexParams::nprobe, 4},
{milvus::knowhere::IndexParams::nbits, 8},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
......@@ -84,9 +86,9 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::nlist, 100},
// {milvus::knowhere::IndexParams::nprobe, 4},
{milvus::knowhere::IndexParams::nprobe, 4},
{milvus::knowhere::IndexParams::m, 4},
{milvus::knowhere::IndexParams::nbits, 8},
{milvus::knowhere::Metric::TYPE, metric_type},
......@@ -95,13 +97,14 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::Metric::TYPE, metric_type},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_NSG) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
{milvus::knowhere::IndexParams::nlist, 163},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::nprobe, 8},
{milvus::knowhere::IndexParams::knng, 20},
{milvus::knowhere::IndexParams::search_length, 40},
......@@ -127,17 +130,14 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
#endif
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_HNSW) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::IndexParams::M, 16},
{milvus::knowhere::IndexParams::efConstruction, 200},
{milvus::knowhere::IndexParams::ef, 200},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::meta::DIM, DIM}, {milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::M, 16}, {milvus::knowhere::IndexParams::efConstruction, 200},
{milvus::knowhere::IndexParams::ef, 200}, {milvus::knowhere::Metric::TYPE, metric_type},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_ANNOY) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::n_trees, 4},
{milvus::knowhere::IndexParams::search_k, 100},
{milvus::knowhere::Metric::TYPE, metric_type},
......@@ -146,7 +146,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWFlat) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::M, 16},
{milvus::knowhere::IndexParams::efConstruction, 200},
{milvus::knowhere::IndexParams::ef, 200},
......@@ -156,7 +156,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWPQ) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::M, 16},
{milvus::knowhere::IndexParams::efConstruction, 200},
{milvus::knowhere::IndexParams::ef, 200},
......@@ -167,7 +167,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWSQ) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::M, 16},
{milvus::knowhere::IndexParams::efConstruction, 200},
{milvus::knowhere::IndexParams::ef, 200},
......@@ -177,7 +177,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTPANNG) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::IndexParams::edge_size, 10},
{milvus::knowhere::IndexParams::epsilon, 0.1},
......@@ -189,7 +189,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTONNG) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::IndexParams::edge_size, 20},
{milvus::knowhere::IndexParams::epsilon, 0.1},
......@@ -234,6 +234,99 @@ GenDataset(int64_t N, const milvus::knowhere::MetricType& metric_type, bool is_b
return milvus::segcore::DataGen(schema, N);
}
}
using QueryResultPtr = std::unique_ptr<milvus::indexbuilder::IndexWrapper::QueryResult>;
void
PrintQueryResult(const QueryResultPtr& result) {
auto nq = result->nq;
auto k = result->topk;
std::stringstream ss_id;
std::stringstream ss_dist;
for (auto i = 0; i < nq; i++) {
for (auto j = 0; j < k; ++j) {
ss_id << result->ids[i * k + j] << " ";
ss_dist << result->distances[i * k + j] << " ";
}
ss_id << std::endl;
ss_dist << std::endl;
}
std::cout << "id\n" << ss_id.str() << std::endl;
std::cout << "dist\n" << ss_dist.str() << std::endl;
}
float
L2(const float* point_a, const float* point_b, int dim) {
float dis = 0;
for (auto i = 0; i < dim; i++) {
auto c_a = point_a[i];
auto c_b = point_b[i];
dis += pow(c_b - c_a, 2);
}
return dis;
}
int hamming_weight(uint8_t n) {
int count=0;
while(n != 0){
count += n&1;
n >>= 1;
}
return count;
}
float
Jaccard(const uint8_t* point_a, const uint8_t* point_b, int dim) {
float dis;
int len = dim / 8;
float intersection = 0;
float union_num = 0;
for (int i = 0; i < len; i++) {
intersection += hamming_weight(point_a[i] & point_b[i]);
union_num += hamming_weight(point_a[i] | point_b[i]);
}
dis = 1 - (intersection / union_num);
return dis;
}
float
CountDistance(const void* point_a,
const void* point_b,
int dim,
const milvus::knowhere::MetricType& metric,
bool is_binary = false) {
if (point_a == nullptr || point_b == nullptr) {
return std::numeric_limits<float>::max();
}
if (metric == milvus::knowhere::Metric::L2) {
return L2(static_cast<const float*>(point_a), static_cast<const float*>(point_b), dim);
} else if (metric == milvus::knowhere::Metric::JACCARD) {
return Jaccard(static_cast<const uint8_t*>(point_a), static_cast<const uint8_t*>(point_b), dim);
} else {
return std::numeric_limits<float>::max();
}
}
void
CheckDistances(const QueryResultPtr& result,
const milvus::knowhere::DatasetPtr& base_dataset,
const milvus::knowhere::DatasetPtr& query_dataset,
const milvus::knowhere::MetricType& metric,
const float threshold = 1.0e-5) {
auto base_vecs = base_dataset->Get<float*>(milvus::knowhere::meta::TENSOR);
auto query_vecs = query_dataset->Get<float*>(milvus::knowhere::meta::TENSOR);
auto dim = base_dataset->Get<int64_t>(milvus::knowhere::meta::DIM);
auto nq = result->nq;
auto k = result->topk;
for (auto i = 0; i < nq; i++) {
for (auto j = 0; j < k; ++j) {
auto dis = result->distances[i * k + j];
auto id = result->ids[i * k + j];
auto count_dis = CountDistance(query_vecs + i * dim, base_vecs + id * dim, dim, metric);
// assert(std::abs(dis - count_dis) < threshold);
}
}
}
} // namespace
using Param = std::pair<milvus::knowhere::IndexType, milvus::knowhere::MetricType>;
......@@ -247,8 +340,26 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
metric_type = param.second;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::map<std::string, bool> is_binary_map = {{milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ, false},
{milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, true}};
std::map<std::string, bool> is_binary_map = {
{milvus::knowhere::IndexEnum::INDEX_FAISS_IDMAP, false},
{milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ, false},
{milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, false},
{milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, false},
{milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, true},
{milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, true},
#ifdef MILVUS_SUPPORT_SPTAG
{milvus::knowhere::IndexEnum::INDEX_SPTAG_KDT_RNT, false},
{milvus::knowhere::IndexEnum::INDEX_SPTAG_BKT_RNT, false},
#endif
{milvus::knowhere::IndexEnum::INDEX_HNSW, false},
{milvus::knowhere::IndexEnum::INDEX_ANNOY, false},
{milvus::knowhere::IndexEnum::INDEX_RHNSWFlat, false},
{milvus::knowhere::IndexEnum::INDEX_RHNSWPQ, false},
{milvus::knowhere::IndexEnum::INDEX_RHNSWSQ, false},
{milvus::knowhere::IndexEnum::INDEX_NGTPANNG, false},
{milvus::knowhere::IndexEnum::INDEX_NGTONNG, false},
{milvus::knowhere::IndexEnum::INDEX_NSG, false},
};
is_binary = is_binary_map[index_type];
......@@ -262,9 +373,13 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
if (!is_binary) {
xb_data = dataset.get_col<float>(0);
xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
xq_data = dataset.get_col<float>(0);
xq_dataset = milvus::knowhere::GenDataset(NQ, DIM, xq_data.data());
} else {
xb_bin_data = dataset.get_col<uint8_t>(0);
xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_bin_data.data());
xq_bin_data = dataset.get_col<uint8_t>(0);
xq_dataset = milvus::knowhere::GenDataset(NQ, DIM, xq_bin_data.data());
}
}
......@@ -282,6 +397,9 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
std::vector<float> xb_data;
std::vector<uint8_t> xb_bin_data;
std::vector<milvus::knowhere::IDType> ids;
milvus::knowhere::DatasetPtr xq_dataset;
std::vector<float> xq_data;
std::vector<uint8_t> xq_bin_data;
};
TEST(PQ, Build) {
......@@ -308,6 +426,47 @@ TEST(IVFFLATNM, Build) {
ASSERT_NO_THROW(index->AddWithoutIds(xb_dataset, conf));
}
TEST(IVFFLATNM, Query) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
auto metric_type = milvus::knowhere::Metric::L2;
auto conf = generate_conf(index_type, metric_type);
auto index = milvus::knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type);
auto dataset = GenDataset(NB, metric_type, false);
auto xb_data = dataset.get_col<float>(0);
auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
ASSERT_NO_THROW(index->Train(xb_dataset, conf));
ASSERT_NO_THROW(index->AddWithoutIds(xb_dataset, conf));
auto bs = index->Serialize(conf);
auto bptr = std::make_shared<milvus::knowhere::Binary>();
bptr->data = std::shared_ptr<uint8_t[]>((uint8_t*)xb_data.data(), [&](uint8_t*) {});
bptr->size = DIM * NB * sizeof(float);
bs.Append(RAW_DATA, bptr);
index->Load(bs);
auto xq_data = dataset.get_col<float>(0);
auto xq_dataset = milvus::knowhere::GenDataset(NQ, DIM, xq_data.data());
auto result = index->Query(xq_dataset, conf, nullptr);
}
TEST(NSG, Query) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_NSG;
auto metric_type = milvus::knowhere::Metric::L2;
auto conf = generate_conf(index_type, metric_type);
auto index = milvus::knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type);
auto dataset = GenDataset(NB, metric_type, false);
auto xb_data = dataset.get_col<float>(0);
auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
index->BuildAll(xb_dataset, conf);
auto bs = index->Serialize(conf);
auto bptr = std::make_shared<milvus::knowhere::Binary>();
bptr->data = std::shared_ptr<uint8_t[]>((uint8_t*)xb_data.data(), [&](uint8_t*) {});
bptr->size = DIM * NB * sizeof(float);
bs.Append(RAW_DATA, bptr);
index->Load(bs);
auto xq_data = dataset.get_col<float>(0);
auto xq_dataset = milvus::knowhere::GenDataset(NQ, DIM, xq_data.data());
auto result = index->Query(xq_dataset, conf, nullptr);
}
TEST(BINFLAT, Build) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
auto metric_type = milvus::knowhere::Metric::JACCARD;
......@@ -485,12 +644,7 @@ TEST_P(IndexWrapperTest, Dim) {
TEST_P(IndexWrapperTest, BuildWithoutIds) {
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
if (milvus::indexbuilder::is_in_need_id_list(index_type)) {
ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
} else {
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
}
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
}
TEST_P(IndexWrapperTest, Codec) {
......@@ -511,3 +665,16 @@ TEST_P(IndexWrapperTest, Codec) {
ASSERT_EQ(strcmp(binary.data, copy_binary.data), 0);
}
}
TEST_P(IndexWrapperTest, Query) {
auto index_wrapper =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
index_wrapper->BuildWithoutIds(xb_dataset);
std::unique_ptr<milvus::indexbuilder::IndexWrapper::QueryResult> query_result = index_wrapper->Query(xq_dataset);
ASSERT_EQ(query_result->topk, K);
ASSERT_EQ(query_result->nq, NQ);
ASSERT_EQ(query_result->distances.size(), query_result->topk * query_result->nq);
ASSERT_EQ(query_result->ids.size(), query_result->topk * query_result->nq);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册