diff --git a/internal/core/bench/CMakeLists.txt b/internal/core/bench/CMakeLists.txt index 8b1b0a13e6e92419cb8fb4df7e8618c4cd7e4fbd..23f38ab5b4e997103dfec05bb1ad5137b09f2d28 100644 --- a/internal/core/bench/CMakeLists.txt +++ b/internal/core/bench/CMakeLists.txt @@ -7,6 +7,10 @@ set(bench_srcs bench_search.cpp ) +set(indexbuilder_bench_srcs + bench_indexbuilder.cpp +) + add_executable(all_bench ${bench_srcs}) target_link_libraries(all_bench milvus_segcore @@ -16,3 +20,13 @@ target_link_libraries(all_bench ) target_link_libraries(all_bench benchmark::benchmark_main) + +add_executable(indexbuilder_bench ${indexbuilder_bench_srcs}) +target_link_libraries(indexbuilder_bench + milvus_segcore + milvus_indexbuilder + log + pthread + ) + +target_link_libraries(indexbuilder_bench benchmark::benchmark_main) diff --git a/internal/core/bench/bench_indexbuilder.cpp b/internal/core/bench/bench_indexbuilder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f9731e51c5879646ef093fe2545fa1ab65bb5089 --- /dev/null +++ b/internal/core/bench/bench_indexbuilder.cpp @@ -0,0 +1,107 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include +#include + +#include "pb/index_cgo_msg.pb.h" +#include "index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h" +#include "index/knowhere/knowhere/index/vector_index/adapter/VectorAdapter.h" +#include "indexbuilder/IndexWrapper.h" +#include "indexbuilder/index_c.h" +#include "indexbuilder/utils.h" +#include "test_utils/indexbuilder_test_utils.h" + +constexpr int64_t NB = 1000000; + +namespace indexcgo = milvus::proto::indexcgo; + +auto index_type_collections = [] { + static std::map collections{ + {0, milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT}, + }; + return collections; +}(); + +auto metric_type_collections = [] { + static std::map collections{ + {0, milvus::knowhere::Metric::L2}, + }; + return collections; +}(); + +static void +IndexBuilder_build(benchmark::State& state) { + auto index_type = index_type_collections.at(state.range(0)); + auto metric_type = metric_type_collections.at(state.range(0)); + + indexcgo::TypeParams type_params; + indexcgo::IndexParams index_params; + + std::tie(type_params, index_params) = generate_params(index_type, metric_type); + + std::string type_params_str, index_params_str; + bool ok; + ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str); + assert(ok); + ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); + assert(ok); + + auto is_binary = state.range(2); + auto dataset = GenDataset(NB, metric_type, is_binary); + auto xb_data = dataset.get_col(0); + auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data()); + + for (auto _ : state) { + auto index = + std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + index->BuildWithoutIds(xb_dataset); + } +} + +static void +IndexBuilder_build_and_codec(benchmark::State& state) { + auto index_type = index_type_collections.at(state.range(0)); + auto metric_type = metric_type_collections.at(state.range(0)); + + indexcgo::TypeParams type_params; + indexcgo::IndexParams index_params; + + std::tie(type_params, index_params) = generate_params(index_type, metric_type); + + std::string type_params_str, index_params_str; + bool ok; + ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str); + assert(ok); + ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); + assert(ok); + + auto is_binary = state.range(2); + auto dataset = GenDataset(NB, metric_type, is_binary); + auto xb_data = dataset.get_col(0); + auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data()); + + for (auto _ : state) { + auto index = + std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + + index->BuildWithoutIds(xb_dataset); + index->Serialize(); + } +} + +// IVF_FLAT, L2, VectorFloat +BENCHMARK(IndexBuilder_build)->Args({0, 0, false}); + +// IVF_FLAT, L2, VectorFloat +BENCHMARK(IndexBuilder_build_and_codec)->Args({0, 0, false}); diff --git a/internal/core/src/indexbuilder/IndexWrapper.cpp b/internal/core/src/indexbuilder/IndexWrapper.cpp index 7c44c3f6e150053180e9e978d61d0cbc229625fb..b2f26532ec0615be7a2b2ef0dc2a211b4fca35fc 100644 --- a/internal/core/src/indexbuilder/IndexWrapper.cpp +++ b/internal/core/src/indexbuilder/IndexWrapper.cpp @@ -20,6 +20,7 @@ #include "IndexWrapper.h" #include "indexbuilder/utils.h" #include "index/knowhere/knowhere/index/vector_index/ConfAdapterMgr.h" +#include "index/knowhere/knowhere/common/Timer.h" namespace milvus { namespace indexbuilder { @@ -169,6 +170,7 @@ IndexWrapper::BuildWithoutIds(const knowhere::DatasetPtr& dataset) { if (is_in_need_id_list(index_type)) { PanicInfo(std::string(index_type) + " doesn't support build without ids yet!"); } + knowhere::TimeRecorder rc("BuildWithoutIds", 1); // if (is_in_need_build_all_list(index_type)) { // index_->BuildAll(dataset, config_); // } else { @@ -176,10 +178,13 @@ IndexWrapper::BuildWithoutIds(const knowhere::DatasetPtr& dataset) { // index_->AddWithoutIds(dataset, config_); // } index_->BuildAll(dataset, config_); + rc.RecordSection("TrainAndAdd"); if (is_in_nm_list(index_type)) { StoreRawData(dataset); + rc.RecordSection("StoreRawData"); } + rc.ElapseFromBegin("Done"); } void diff --git a/internal/core/src/indexbuilder/IndexWrapper.h b/internal/core/src/indexbuilder/IndexWrapper.h index 90e1486fef84a6c8e5d5ee1689514ee725a33d92..ad5227b108e59f1ba9b6c461a243e4d103dd9bf0 100644 --- a/internal/core/src/indexbuilder/IndexWrapper.h +++ b/internal/core/src/indexbuilder/IndexWrapper.h @@ -9,6 +9,7 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License +#pragma once #include #include #include diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index 99daa7d419fde3b1789eaf3f8bfe1e03399efc66..988367407a8f15802b35ac59cb13aa67b620d8b1 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -11,7 +11,6 @@ #include #include -#include #include #include #include @@ -22,314 +21,13 @@ #include "indexbuilder/IndexWrapper.h" #include "indexbuilder/index_c.h" #include "test_utils/DataGen.h" -#include "faiss/MetricType.h" #include "index/knowhere/knowhere/index/vector_index/VecIndexFactory.h" #include "indexbuilder/utils.h" +#include "test_utils/indexbuilder_test_utils.h" +constexpr int64_t NB = 100000; namespace indexcgo = milvus::proto::indexcgo; -constexpr int64_t DIM = 8; -constexpr int64_t NB = 10000; -constexpr int64_t NQ = 10; -constexpr int64_t K = 4; -constexpr auto METRIC_TYPE = milvus::knowhere::Metric::L2; -#ifdef MILVUS_GPU_VERSION -int DEVICEID = 0; -#endif - -namespace { -auto -generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowhere::MetricType& metric_type) { - if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IDMAP) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::nlist, 100}, - {milvus::knowhere::IndexParams::nprobe, 4}, - {milvus::knowhere::IndexParams::m, 4}, - {milvus::knowhere::IndexParams::nbits, 8}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::nlist, 100}, - {milvus::knowhere::IndexParams::nprobe, 4}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, -#ifdef MILVUS_GPU_VERSION - {milvus::knowhere::meta::DEVICEID, DEVICEID}, -#endif - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::nlist, 100}, - {milvus::knowhere::IndexParams::nprobe, 4}, - {milvus::knowhere::IndexParams::nbits, 8}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, -#ifdef MILVUS_GPU_VERSION - {milvus::knowhere::meta::DEVICEID, DEVICEID}, -#endif - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::nlist, 100}, - {milvus::knowhere::IndexParams::nprobe, 4}, - {milvus::knowhere::IndexParams::m, 4}, - {milvus::knowhere::IndexParams::nbits, 8}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::Metric::TYPE, metric_type}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NSG) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::IndexParams::nlist, 163}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::nprobe, 8}, - {milvus::knowhere::IndexParams::knng, 20}, - {milvus::knowhere::IndexParams::search_length, 40}, - {milvus::knowhere::IndexParams::out_degree, 30}, - {milvus::knowhere::IndexParams::candidate, 100}, - {milvus::knowhere::Metric::TYPE, metric_type}, - }; -#ifdef MILVUS_SUPPORT_SPTAG - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_SPTAG_KDT_RNT) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, 10}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_SPTAG_BKT_RNT) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, 10}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - }; -#endif - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_HNSW) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::M, 16}, {milvus::knowhere::IndexParams::efConstruction, 200}, - {milvus::knowhere::IndexParams::ef, 200}, {milvus::knowhere::Metric::TYPE, metric_type}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_ANNOY) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::n_trees, 4}, - {milvus::knowhere::IndexParams::search_k, 100}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWFlat) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::M, 16}, - {milvus::knowhere::IndexParams::efConstruction, 200}, - {milvus::knowhere::IndexParams::ef, 200}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWPQ) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::M, 16}, - {milvus::knowhere::IndexParams::efConstruction, 200}, - {milvus::knowhere::IndexParams::ef, 200}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - {milvus::knowhere::IndexParams::PQM, 8}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWSQ) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::M, 16}, - {milvus::knowhere::IndexParams::efConstruction, 200}, - {milvus::knowhere::IndexParams::ef, 200}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTPANNG) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::IndexParams::edge_size, 10}, - {milvus::knowhere::IndexParams::epsilon, 0.1}, - {milvus::knowhere::IndexParams::max_search_edges, 50}, - {milvus::knowhere::IndexParams::forcedly_pruned_edge_size, 60}, - {milvus::knowhere::IndexParams::selectively_pruned_edge_size, 30}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - }; - } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTONNG) { - return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::Metric::TYPE, metric_type}, - {milvus::knowhere::IndexParams::edge_size, 20}, - {milvus::knowhere::IndexParams::epsilon, 0.1}, - {milvus::knowhere::IndexParams::max_search_edges, 50}, - {milvus::knowhere::IndexParams::outgoing_edge_size, 5}, - {milvus::knowhere::IndexParams::incoming_edge_size, 40}, - {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, - }; - } - return milvus::knowhere::Config(); -} - -auto -generate_params(const milvus::knowhere::IndexType& index_type, const milvus::knowhere::MetricType& metric_type) { - indexcgo::TypeParams type_params; - indexcgo::IndexParams index_params; - - auto configs = generate_conf(index_type, metric_type); - for (auto& [key, value] : configs.items()) { - auto param = index_params.add_params(); - auto value_str = value.is_string() ? value.get() : value.dump(); - param->set_key(key); - param->set_value(value_str); - } - - auto param = index_params.add_params(); - param->set_key("index_type"); - param->set_value(std::string(index_type)); - - return std::make_tuple(type_params, index_params); -} - -auto -GenDataset(int64_t N, const milvus::knowhere::MetricType& metric_type, bool is_binary, int64_t dim = DIM) { - auto schema = std::make_shared(); - auto faiss_metric_type = milvus::knowhere::GetMetricType(metric_type); - if (!is_binary) { - schema->AddDebugField("fakevec", milvus::engine::DataType::VECTOR_FLOAT, dim, faiss_metric_type); - return milvus::segcore::DataGen(schema, N); - } else { - schema->AddDebugField("fakebinvec", milvus::engine::DataType::VECTOR_BINARY, dim, faiss_metric_type); - return milvus::segcore::DataGen(schema, N); - } -} - -using QueryResultPtr = std::unique_ptr; -void -PrintQueryResult(const QueryResultPtr& result) { - auto nq = result->nq; - auto k = result->topk; - - std::stringstream ss_id; - std::stringstream ss_dist; - - for (auto i = 0; i < nq; i++) { - for (auto j = 0; j < k; ++j) { - ss_id << result->ids[i * k + j] << " "; - ss_dist << result->distances[i * k + j] << " "; - } - ss_id << std::endl; - ss_dist << std::endl; - } - std::cout << "id\n" << ss_id.str() << std::endl; - std::cout << "dist\n" << ss_dist.str() << std::endl; -} - -float -L2(const float* point_a, const float* point_b, int dim) { - float dis = 0; - for (auto i = 0; i < dim; i++) { - auto c_a = point_a[i]; - auto c_b = point_b[i]; - dis += pow(c_b - c_a, 2); - } - return dis; -} - -int -hamming_weight(uint8_t n) { - int count = 0; - while (n != 0) { - count += n & 1; - n >>= 1; - } - return count; -} -float -Jaccard(const uint8_t* point_a, const uint8_t* point_b, int dim) { - float dis; - int len = dim / 8; - float intersection = 0; - float union_num = 0; - for (int i = 0; i < len; i++) { - intersection += hamming_weight(point_a[i] & point_b[i]); - union_num += hamming_weight(point_a[i] | point_b[i]); - } - dis = 1 - (intersection / union_num); - return dis; -} - -float -CountDistance(const void* point_a, - const void* point_b, - int dim, - const milvus::knowhere::MetricType& metric, - bool is_binary = false) { - if (point_a == nullptr || point_b == nullptr) { - return std::numeric_limits::max(); - } - if (metric == milvus::knowhere::Metric::L2) { - return L2(static_cast(point_a), static_cast(point_b), dim); - } else if (metric == milvus::knowhere::Metric::JACCARD) { - return Jaccard(static_cast(point_a), static_cast(point_b), dim); - } else { - return std::numeric_limits::max(); - } -} - -void -CheckDistances(const QueryResultPtr& result, - const milvus::knowhere::DatasetPtr& base_dataset, - const milvus::knowhere::DatasetPtr& query_dataset, - const milvus::knowhere::MetricType& metric, - const float threshold = 1.0e-5) { - auto base_vecs = base_dataset->Get(milvus::knowhere::meta::TENSOR); - auto query_vecs = query_dataset->Get(milvus::knowhere::meta::TENSOR); - auto dim = base_dataset->Get(milvus::knowhere::meta::DIM); - auto nq = result->nq; - auto k = result->topk; - for (auto i = 0; i < nq; i++) { - for (auto j = 0; j < k; ++j) { - auto dis = result->distances[i * k + j]; - auto id = result->ids[i * k + j]; - auto count_dis = CountDistance(query_vecs + i * dim, base_vecs + id * dim, dim, metric); - // assert(std::abs(dis - count_dis) < threshold); - } - } -} -} // namespace - using Param = std::pair; class IndexWrapperTest : public ::testing::TestWithParam { diff --git a/internal/core/unittest/test_utils/indexbuilder_test_utils.h b/internal/core/unittest/test_utils/indexbuilder_test_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..6dd18a76be80ceab503cb542d98bb61e845aee16 --- /dev/null +++ b/internal/core/unittest/test_utils/indexbuilder_test_utils.h @@ -0,0 +1,330 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#pragma once + +#include +#include +#include +#include +#include + +#include "pb/index_cgo_msg.pb.h" +#include "index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h" +#include "index/knowhere/knowhere/index/vector_index/adapter/VectorAdapter.h" +#include "indexbuilder/IndexWrapper.h" +#include "indexbuilder/index_c.h" +#include "DataGen.h" +#include "faiss/MetricType.h" +#include "index/knowhere/knowhere/index/vector_index/VecIndexFactory.h" +#include "indexbuilder/utils.h" + +constexpr int64_t DIM = 128; +constexpr int64_t NQ = 10; +constexpr int64_t K = 4; +#ifdef MILVUS_GPU_VERSION +int DEVICEID = 0; +#endif + +namespace { +auto +generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowhere::MetricType& metric_type) { + if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IDMAP) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::nlist, 100}, + {milvus::knowhere::IndexParams::nprobe, 4}, + {milvus::knowhere::IndexParams::m, 4}, + {milvus::knowhere::IndexParams::nbits, 8}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::nlist, 1024}, + {milvus::knowhere::IndexParams::nprobe, 4}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, +#ifdef MILVUS_GPU_VERSION + {milvus::knowhere::meta::DEVICEID, DEVICEID}, +#endif + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::nlist, 100}, + {milvus::knowhere::IndexParams::nprobe, 4}, + {milvus::knowhere::IndexParams::nbits, 8}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, +#ifdef MILVUS_GPU_VERSION + {milvus::knowhere::meta::DEVICEID, DEVICEID}, +#endif + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::nlist, 100}, + {milvus::knowhere::IndexParams::nprobe, 4}, + {milvus::knowhere::IndexParams::m, 4}, + {milvus::knowhere::IndexParams::nbits, 8}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::Metric::TYPE, metric_type}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NSG) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::IndexParams::nlist, 163}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::nprobe, 8}, + {milvus::knowhere::IndexParams::knng, 20}, + {milvus::knowhere::IndexParams::search_length, 40}, + {milvus::knowhere::IndexParams::out_degree, 30}, + {milvus::knowhere::IndexParams::candidate, 100}, + {milvus::knowhere::Metric::TYPE, metric_type}, + }; +#ifdef MILVUS_SUPPORT_SPTAG + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_SPTAG_KDT_RNT) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_SPTAG_BKT_RNT) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; +#endif + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_HNSW) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::M, 16}, {milvus::knowhere::IndexParams::efConstruction, 200}, + {milvus::knowhere::IndexParams::ef, 200}, {milvus::knowhere::Metric::TYPE, metric_type}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_ANNOY) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::n_trees, 4}, + {milvus::knowhere::IndexParams::search_k, 100}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWFlat) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::M, 16}, + {milvus::knowhere::IndexParams::efConstruction, 200}, + {milvus::knowhere::IndexParams::ef, 200}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWPQ) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::M, 16}, + {milvus::knowhere::IndexParams::efConstruction, 200}, + {milvus::knowhere::IndexParams::ef, 200}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + {milvus::knowhere::IndexParams::PQM, 8}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWSQ) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::M, 16}, + {milvus::knowhere::IndexParams::efConstruction, 200}, + {milvus::knowhere::IndexParams::ef, 200}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTPANNG) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::IndexParams::edge_size, 10}, + {milvus::knowhere::IndexParams::epsilon, 0.1}, + {milvus::knowhere::IndexParams::max_search_edges, 50}, + {milvus::knowhere::IndexParams::forcedly_pruned_edge_size, 60}, + {milvus::knowhere::IndexParams::selectively_pruned_edge_size, 30}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTONNG) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::IndexParams::edge_size, 20}, + {milvus::knowhere::IndexParams::epsilon, 0.1}, + {milvus::knowhere::IndexParams::max_search_edges, 50}, + {milvus::knowhere::IndexParams::outgoing_edge_size, 5}, + {milvus::knowhere::IndexParams::incoming_edge_size, 40}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } + return milvus::knowhere::Config(); +} + +auto +generate_params(const milvus::knowhere::IndexType& index_type, const milvus::knowhere::MetricType& metric_type) { + namespace indexcgo = milvus::proto::indexcgo; + + indexcgo::TypeParams type_params; + indexcgo::IndexParams index_params; + + auto configs = generate_conf(index_type, metric_type); + for (auto& [key, value] : configs.items()) { + auto param = index_params.add_params(); + auto value_str = value.is_string() ? value.get() : value.dump(); + param->set_key(key); + param->set_value(value_str); + } + + auto param = index_params.add_params(); + param->set_key("index_type"); + param->set_value(std::string(index_type)); + + return std::make_tuple(type_params, index_params); +} + +auto +GenDataset(int64_t N, const milvus::knowhere::MetricType& metric_type, bool is_binary, int64_t dim = DIM) { + auto schema = std::make_shared(); + auto faiss_metric_type = milvus::knowhere::GetMetricType(metric_type); + if (!is_binary) { + schema->AddDebugField("fakevec", milvus::engine::DataType::VECTOR_FLOAT, dim, faiss_metric_type); + return milvus::segcore::DataGen(schema, N); + } else { + schema->AddDebugField("fakebinvec", milvus::engine::DataType::VECTOR_BINARY, dim, faiss_metric_type); + return milvus::segcore::DataGen(schema, N); + } +} + +using QueryResultPtr = std::unique_ptr; +void +PrintQueryResult(const QueryResultPtr& result) { + auto nq = result->nq; + auto k = result->topk; + + std::stringstream ss_id; + std::stringstream ss_dist; + + for (auto i = 0; i < nq; i++) { + for (auto j = 0; j < k; ++j) { + ss_id << result->ids[i * k + j] << " "; + ss_dist << result->distances[i * k + j] << " "; + } + ss_id << std::endl; + ss_dist << std::endl; + } + std::cout << "id\n" << ss_id.str() << std::endl; + std::cout << "dist\n" << ss_dist.str() << std::endl; +} + +float +L2(const float* point_a, const float* point_b, int dim) { + float dis = 0; + for (auto i = 0; i < dim; i++) { + auto c_a = point_a[i]; + auto c_b = point_b[i]; + dis += pow(c_b - c_a, 2); + } + return dis; +} + +int +hamming_weight(uint8_t n) { + int count = 0; + while (n != 0) { + count += n & 1; + n >>= 1; + } + return count; +} +float +Jaccard(const uint8_t* point_a, const uint8_t* point_b, int dim) { + float dis; + int len = dim / 8; + float intersection = 0; + float union_num = 0; + for (int i = 0; i < len; i++) { + intersection += hamming_weight(point_a[i] & point_b[i]); + union_num += hamming_weight(point_a[i] | point_b[i]); + } + dis = 1 - (intersection / union_num); + return dis; +} + +float +CountDistance(const void* point_a, + const void* point_b, + int dim, + const milvus::knowhere::MetricType& metric, + bool is_binary = false) { + if (point_a == nullptr || point_b == nullptr) { + return std::numeric_limits::max(); + } + if (metric == milvus::knowhere::Metric::L2) { + return L2(static_cast(point_a), static_cast(point_b), dim); + } else if (metric == milvus::knowhere::Metric::JACCARD) { + return Jaccard(static_cast(point_a), static_cast(point_b), dim); + } else { + return std::numeric_limits::max(); + } +} + +void +CheckDistances(const QueryResultPtr& result, + const milvus::knowhere::DatasetPtr& base_dataset, + const milvus::knowhere::DatasetPtr& query_dataset, + const milvus::knowhere::MetricType& metric, + const float threshold = 1.0e-5) { + auto base_vecs = base_dataset->Get(milvus::knowhere::meta::TENSOR); + auto query_vecs = query_dataset->Get(milvus::knowhere::meta::TENSOR); + auto dim = base_dataset->Get(milvus::knowhere::meta::DIM); + auto nq = result->nq; + auto k = result->topk; + for (auto i = 0; i < nq; i++) { + for (auto j = 0; j < k; ++j) { + auto dis = result->distances[i * k + j]; + auto id = result->ids[i * k + j]; + auto count_dis = CountDistance(query_vecs + i * dim, base_vecs + id * dim, dim, metric); + // assert(std::abs(dis - count_dis) < threshold); + } + } +} +} // namespace