提交 4835d820 编写于 作者: B bigsheeper 提交者: yefu.chen

Add index builder test cases

Signed-off-by: Nbigsheeper <yihao.dai@zilliz.com>
上级 e7998da7
......@@ -18,6 +18,7 @@
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "utils/EasyAssert.h"
#include "IndexWrapper.h"
#include "indexbuilder/utils.h"
namespace milvus {
namespace indexbuilder {
......@@ -30,12 +31,10 @@ IndexWrapper::IndexWrapper(const char* serialized_type_params, const char* seria
std::map<std::string, knowhere::IndexMode> mode_map = {{"CPU", knowhere::IndexMode::MODE_CPU},
{"GPU", knowhere::IndexMode::MODE_GPU}};
auto type = get_config_by_name<std::string>("index_type");
auto mode = get_config_by_name<std::string>("index_mode");
auto index_type = type.has_value() ? type.value() : knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
auto index_mode = mode.has_value() ? mode_map[mode.value()] : knowhere::IndexMode::MODE_CPU;
index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type, index_mode);
index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(get_index_type(), index_mode);
Assert(index_ != nullptr);
}
......@@ -53,17 +52,17 @@ IndexWrapper::parse() {
Assert(deserialized_success);
for (auto i = 0; i < type_config.params_size(); ++i) {
auto type_param = type_config.params(i);
auto key = type_param.key();
auto value = type_param.value();
const auto& type_param = type_config.params(i);
const auto& key = type_param.key();
const auto& value = type_param.value();
type_config_[key] = value;
config_[key] = value;
}
for (auto i = 0; i < index_config.params_size(); ++i) {
auto index_param = index_config.params(i);
auto key = index_param.key();
auto value = index_param.value();
const auto& index_param = index_config.params(i);
const auto& key = index_param.key();
const auto& value = index_param.value();
index_config_[key] = value;
config_[key] = value;
}
......@@ -132,12 +131,26 @@ IndexWrapper::dim() {
void
IndexWrapper::BuildWithoutIds(const knowhere::DatasetPtr& dataset) {
auto index_type = index_->index_type();
auto index_type = get_index_type();
if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
PanicInfo(std::string(index_type) + " doesn't support build without ids yet!");
}
index_->Train(dataset, config_);
index_->AddWithoutIds(dataset, config_);
if (is_in_nm_list(index_type)) {
auto tensor = dataset->Get<const void*>(milvus::knowhere::meta::TENSOR);
auto row_num = dataset->Get<int64_t>(milvus::knowhere::meta::ROWS);
auto dim = dataset->Get<int64_t>(milvus::knowhere::meta::DIM);
int64_t data_size;
if (is_in_bin_list(index_type)) {
data_size = dim / 8 * row_num;
} else {
data_size = dim * row_num * sizeof(float);
}
raw_data_.resize(data_size);
memcpy(raw_data_.data(), tensor, data_size);
}
}
void
......@@ -153,8 +166,15 @@ IndexWrapper::BuildWithIds(const knowhere::DatasetPtr& dataset) {
*/
milvus::indexbuilder::IndexWrapper::Binary
IndexWrapper::Serialize() {
namespace indexcgo = milvus::proto::indexcgo;
auto binarySet = index_->Serialize(config_);
auto index_type = get_index_type();
if (is_in_nm_list(index_type)) {
std::shared_ptr<uint8_t[]> raw_data(new uint8_t[raw_data_.size()], std::default_delete<uint8_t[]>());
memcpy(raw_data.get(), raw_data_.data(), raw_data_.size());
binarySet.Append(RAW_DATA, raw_data, raw_data_.size());
}
namespace indexcgo = milvus::proto::indexcgo;
indexcgo::BinarySet ret;
for (auto [key, value] : binarySet.binary_map_) {
......@@ -184,16 +204,25 @@ IndexWrapper::Load(const char* serialized_sliced_blob_buffer, int32_t size) {
milvus::knowhere::BinarySet binarySet;
for (auto i = 0; i < blob_buffer.datas_size(); i++) {
auto binary = blob_buffer.datas(i);
std::shared_ptr<uint8_t[]> binary_data(new uint8_t[binary.value().length() + 1],
std::default_delete<uint8_t[]>());
memcpy(binary_data.get(), binary.value().c_str(), binary.value().length());
binary_data[binary.value().length()] = 0;
binarySet.Append(binary.key(), binary_data, binary.value().length() + 1);
const auto& binary = blob_buffer.datas(i);
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
auto bptr = std::make_shared<milvus::knowhere::Binary>();
bptr->data = std::shared_ptr<uint8_t[]>((uint8_t*)binary.value().c_str(), deleter);
bptr->size = binary.value().length();
binarySet.Append(binary.key(), bptr);
}
index_->Load(binarySet);
}
std::string
IndexWrapper::get_index_type() {
// return index_->index_type();
// knowhere bug here
// the index_type of all ivf-based index will change to ivf flat after loaded
auto type = get_config_by_name<std::string>("index_type");
return type.has_value() ? type.value() : knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
}
} // namespace indexbuilder
} // namespace milvus
......@@ -11,6 +11,7 @@
#include <string>
#include <optional>
#include <vector>
#include "knowhere/index/vector_index/VecIndex.h"
namespace milvus {
......@@ -41,6 +42,9 @@ class IndexWrapper {
void
parse();
std::string
get_index_type();
template <typename T>
std::optional<T>
get_config_by_name(std::string name);
......@@ -56,6 +60,7 @@ class IndexWrapper {
milvus::json type_config_;
milvus::json index_config_;
knowhere::Config config_;
std::vector<uint8_t> raw_data_;
};
} // namespace indexbuilder
......
......@@ -30,13 +30,6 @@ CStatus
CreateIndex(const char* serialized_type_params, const char* serialized_index_params, CIndex* res_index) {
auto status = CStatus();
try {
// std::cout << "strlen(serialized_type_params): " << CGODebugUtils::Strlen(serialized_type_params,
// type_params_size)
// << std::endl;
// std::cout << "type_params_size: " << type_params_size << std::endl;
// std::cout << "strlen(serialized_index_params): "
// << CGODebugUtils::Strlen(serialized_index_params, index_params_size) << std::endl;
// std::cout << "index_params_size: " << index_params_size << std::endl;
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(serialized_type_params, serialized_index_params);
*res_index = index.release();
......@@ -108,8 +101,17 @@ SerializeToSlicedBuffer(CIndex index, int32_t* buffer_size, char** res_buffer) {
return status;
}
void
CStatus
LoadFromSlicedBuffer(CIndex index, const char* serialized_sliced_blob_buffer, int32_t size) {
auto cIndex = (milvus::indexbuilder::IndexWrapper*)index;
cIndex->Load(serialized_sliced_blob_buffer, size);
auto status = CStatus();
try {
auto cIndex = (milvus::indexbuilder::IndexWrapper*)index;
cIndex->Load(serialized_sliced_blob_buffer, size);
status.error_code = Success;
status.error_msg = "";
} catch (std::runtime_error& e) {
status.error_code = UnexpectedException;
status.error_msg = strdup(e.what());
}
return status;
}
......@@ -50,7 +50,7 @@ BuildBinaryVecIndexWithoutIds(CIndex index, int64_t data_size, const uint8_t* ve
CStatus
SerializeToSlicedBuffer(CIndex index, int32_t* buffer_size, char** res_buffer);
void
CStatus
LoadFromSlicedBuffer(CIndex index, const char* serialized_sliced_blob_buffer, int32_t size);
#ifdef __cplusplus
......
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#include <vector>
#include <string>
#include <algorithm>
#include "index/knowhere/knowhere/index/IndexType.h"
namespace milvus {
namespace indexbuilder {
std::vector<std::string>
NM_List() {
static std::vector<std::string> ret{
milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
milvus::knowhere::IndexEnum::INDEX_NSG,
};
return ret;
}
std::vector<std::string>
BIN_List() {
static std::vector<std::string> ret{milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT};
return ret;
}
bool
is_in_bin_list(const milvus::knowhere::IndexType& index_type) {
auto bin_list = BIN_List();
return std::find(bin_list.begin(), bin_list.end(), index_type) != bin_list.end();
}
bool
is_in_nm_list(const milvus::knowhere::IndexType& index_type) {
auto nm_list = NM_List();
return std::find(nm_list.begin(), nm_list.end(), index_type) != nm_list.end();
}
} // namespace indexbuilder
} // namespace milvus
......@@ -22,6 +22,7 @@
#include "test_utils/DataGen.h"
#include "faiss/MetricType.h"
#include "index/knowhere/knowhere/index/vector_index/VecIndexFactory.h"
#include "indexbuilder/utils.h"
namespace indexcgo = milvus::proto::indexcgo;
......@@ -30,6 +31,9 @@ constexpr int64_t NB = 10000;
constexpr int64_t NQ = 10;
constexpr int64_t K = 4;
constexpr auto METRIC_TYPE = milvus::knowhere::Metric::L2;
#ifdef MILVUS_GPU_VERSION
int DEVICEID = 0;
#endif
namespace {
auto
......@@ -45,6 +49,18 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, K},
{milvus::knowhere::IndexParams::nlist, 100},
// {milvus::knowhere::IndexParams::nprobe, 4},
{milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
#ifdef MILVUS_GPU_VERSION
{milvus::knowhere::meta::DEVICEID, DEVICEID},
#endif
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
......@@ -87,7 +103,7 @@ generate_params(const milvus::knowhere::IndexType& index_type, const milvus::kno
}
auto
GenDataset(int64_t N, milvus::knowhere::MetricType metric_type, bool is_binary, int64_t dim = DIM) {
GenDataset(int64_t N, const milvus::knowhere::MetricType& metric_type, bool is_binary, int64_t dim = DIM) {
auto schema = std::make_shared<milvus::Schema>();
auto faiss_metric_type = milvus::knowhere::GetMetricType(metric_type);
if (!is_binary) {
......@@ -165,6 +181,18 @@ TEST(PQ, Build) {
ASSERT_NO_THROW(index->AddWithoutIds(xb_dataset, conf));
}
TEST(IVFFLATNM, Build) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
auto metric_type = milvus::knowhere::Metric::L2;
auto conf = generate_conf(index_type, metric_type);
auto index = milvus::knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type);
auto dataset = GenDataset(NB, metric_type, false);
auto xb_data = dataset.get_col<float>(0);
auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
ASSERT_NO_THROW(index->Train(xb_dataset, conf));
ASSERT_NO_THROW(index->AddWithoutIds(xb_dataset, conf));
}
TEST(BINFLAT, Build) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
auto metric_type = milvus::knowhere::Metric::JACCARD;
......@@ -211,6 +239,53 @@ TEST(PQWrapper, Build) {
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
}
TEST(IVFFLATNMWrapper, Build) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
auto metric_type = milvus::knowhere::Metric::L2;
indexcgo::TypeParams type_params;
indexcgo::IndexParams index_params;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::string type_params_str, index_params_str;
bool ok;
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
assert(ok);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, false);
auto xb_data = dataset.get_col<float>(0);
auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
}
TEST(IVFFLATNMWrapper, Codec) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
auto metric_type = milvus::knowhere::Metric::L2;
indexcgo::TypeParams type_params;
indexcgo::IndexParams index_params;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::string type_params_str, index_params_str;
bool ok;
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
assert(ok);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, false);
auto xb_data = dataset.get_col<float>(0);
auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
auto binary = index->Serialize();
auto copy_index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
ASSERT_NO_THROW(copy_index->Load(binary.data, binary.size));
ASSERT_EQ(copy_index->dim(), copy_index->dim());
auto copy_binary = copy_index->Serialize();
}
TEST(BinFlatWrapper, Build) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
auto metric_type = milvus::knowhere::Metric::JACCARD;
......@@ -257,14 +332,14 @@ TEST(BinIdMapWrapper, Build) {
ASSERT_NO_THROW(index->BuildWithIds(xb_dataset));
}
INSTANTIATE_TEST_CASE_P(IndexTypeParameters,
IndexWrapperTest,
::testing::Values(std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ,
milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
milvus::knowhere::Metric::JACCARD),
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
milvus::knowhere::Metric::JACCARD)));
INSTANTIATE_TEST_CASE_P(
IndexTypeParameters,
IndexWrapperTest,
::testing::Values(
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ, milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, milvus::knowhere::Metric::JACCARD),
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, milvus::knowhere::Metric::JACCARD)));
TEST_P(IndexWrapperTest, Constructor) {
auto index =
......@@ -306,6 +381,9 @@ TEST_P(IndexWrapperTest, Codec) {
ASSERT_NO_THROW(copy_index->Load(binary.data, binary.size));
ASSERT_EQ(copy_index->dim(), copy_index->dim());
auto copy_binary = copy_index->Serialize();
ASSERT_EQ(binary.size, copy_binary.size);
ASSERT_EQ(strcmp(binary.data, copy_binary.data), 0);
if (!milvus::indexbuilder::is_in_nm_list(index_type)) {
// binary may be not same due to uncertain internal map order
ASSERT_EQ(binary.size, copy_binary.size);
ASSERT_EQ(strcmp(binary.data, copy_binary.data), 0);
}
}
......@@ -83,10 +83,16 @@ func (index *CIndex) Load(blobs []*Blob) error {
}
/*
void
CStatus
LoadFromSlicedBuffer(CIndex index, const char* serialized_sliced_blob_buffer, int32_t size);
*/
C.LoadFromSlicedBuffer(index.indexPtr, (*C.char)(unsafe.Pointer(&datas[0])), (C.int32_t)(len(datas)))
status := C.LoadFromSlicedBuffer(index.indexPtr, (*C.char)(unsafe.Pointer(&datas[0])), (C.int32_t)(len(datas)))
errorCode := status.error_code
if errorCode != 0 {
errorMsg := C.GoString(status.error_msg)
defer C.free(unsafe.Pointer(status.error_msg))
return errors.New("BuildFloatVecIndexWithoutIds failed, C runtime error detected, error code = " + strconv.Itoa(int(errorCode)) + ", error msg = " + errorMsg)
}
return nil
}
......
......@@ -8,15 +8,24 @@ import (
)
const (
IvfPq = "IVF_PQ"
BinFlat = "BIN_FLAT"
dim = 8
nlist = 100
m = 4
nbits = 8
// index type
IvfPq = "IVF_PQ"
IvfFlatNM = "IVF_FLAT"
BinIvfFlat = "BIN_IVF_FLAT"
BinFlat = "BIN_FLAT"
// metric type
L2 = "L2"
IP = "IP"
hamming = "HAMMING"
Jaccard = "JACCARD"
nb = 8 * 10000
dim = 8
nlist = 100
m = 4
nbits = 8
nb = 8 * 10000
sliceSize = 4
)
type testCase struct {
......@@ -28,12 +37,18 @@ type testCase struct {
func generateFloatVectorTestCases() []testCase {
return []testCase{
{IvfPq, L2, false},
{IvfPq, IP, false},
{IvfFlatNM, L2, false},
{IvfFlatNM, IP, false},
}
}
func generateBinaryVectorTestCases() []testCase {
return []testCase{
//{BinIvfFlat, Jaccard, true},
//{BinIvfFlat, hamming, true},
{BinFlat, Jaccard, true},
{BinFlat, hamming, true},
}
}
......@@ -51,8 +66,20 @@ func generateParams(indexType, metricType string) (map[string]string, map[string
indexParams["nlist"] = strconv.Itoa(nlist)
indexParams["m"] = strconv.Itoa(m)
indexParams["nbits"] = strconv.Itoa(nbits)
indexParams["SLICE_SIZE"] = strconv.Itoa(sliceSize)
} else if indexType == BinIvfFlat {
indexParams["dim"] = strconv.Itoa(dim)
indexParams["nlist"] = strconv.Itoa(nlist)
indexParams["m"] = strconv.Itoa(m)
indexParams["nbits"] = strconv.Itoa(nbits)
indexParams["SLICE_SIZE"] = strconv.Itoa(sliceSize)
} else if indexType == IvfFlatNM {
indexParams["dim"] = strconv.Itoa(dim)
indexParams["nlist"] = strconv.Itoa(nlist)
} else if indexType == BinFlat {
indexParams["dim"] = strconv.Itoa(dim)
} else {
panic("")
}
return typeParams, indexParams
......@@ -143,6 +170,8 @@ func TestCIndex_Codec(t *testing.T) {
assert.Equal(t, err, nil)
copyIndex, err := NewCIndex(typeParams, indexParams)
assert.NotEqual(t, copyIndex, nil)
assert.Equal(t, err, nil)
err = copyIndex.Load(blobs)
assert.Equal(t, err, nil)
copyBlobs, err := copyIndex.Serialize()
......
......@@ -79,8 +79,11 @@ func CreateBuilder(ctx context.Context) (*Builder, error) {
Creds: credentials.NewStaticV4(minIOAccessKeyID, minIOSecretAccessKey, ""),
Secure: minIOUseSSL,
})
b.kv, err = miniokv.NewMinIOKV(b.loopCtx, minIOClient, "milvus-distributed-indexbuilder")
if err != nil {
return nil, err
}
b.kv, err = miniokv.NewMinIOKV(b.loopCtx, minIOClient, "milvus-distributed-indexbuilder")
if err != nil {
return nil, err
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册