提交 6cd8c252 编写于 作者: D dragondriver 提交者: yefu.chen

Add parameter check for almost all index type

Signed-off-by: Ndragondriver <jiquan.long@zilliz.com>
上级 29b21b4b
......@@ -67,75 +67,65 @@ IndexWrapper::parse() {
config_[key] = value;
}
if (!config_.contains(milvus::knowhere::meta::DIM)) {
// should raise exception here?
PanicInfo("dim must be specific in type params or index params!");
} else {
auto dim = config_[milvus::knowhere::meta::DIM].get<std::string>();
config_[milvus::knowhere::meta::DIM] = std::stoi(dim);
}
auto stoi_closure = [](const std::string& s) -> int { return std::stoi(s); };
if (!config_.contains(milvus::knowhere::meta::TOPK)) {
} else {
auto topk = config_[milvus::knowhere::meta::TOPK].get<std::string>();
config_[milvus::knowhere::meta::TOPK] = std::stoi(topk);
}
/***************************** meta *******************************/
check_parameter<int>(milvus::knowhere::meta::DIM, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::meta::TOPK, stoi_closure, std::nullopt);
if (!config_.contains(milvus::knowhere::IndexParams::nlist)) {
} else {
auto nlist = config_[milvus::knowhere::IndexParams::nlist].get<std::string>();
config_[milvus::knowhere::IndexParams::nlist] = std::stoi(nlist);
}
/***************************** IVF Params *******************************/
check_parameter<int>(milvus::knowhere::IndexParams::nprobe, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::nlist, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::m, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::nbits, stoi_closure, std::nullopt);
if (!config_.contains(milvus::knowhere::IndexParams::nprobe)) {
} else {
auto nprobe = config_[milvus::knowhere::IndexParams::nprobe].get<std::string>();
config_[milvus::knowhere::IndexParams::nprobe] = std::stoi(nprobe);
}
/************************** NSG Parameter **************************/
check_parameter<int>(milvus::knowhere::IndexParams::knng, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::search_length, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::out_degree, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::candidate, stoi_closure, std::nullopt);
if (!config_.contains(milvus::knowhere::IndexParams::nbits)) {
} else {
auto nbits = config_[milvus::knowhere::IndexParams::nbits].get<std::string>();
config_[milvus::knowhere::IndexParams::nbits] = std::stoi(nbits);
}
/************************** HNSW Params *****************************/
check_parameter<int>(milvus::knowhere::IndexParams::efConstruction, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::M, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::ef, stoi_closure, std::nullopt);
if (!config_.contains(milvus::knowhere::IndexParams::m)) {
} else {
auto m = config_[milvus::knowhere::IndexParams::m].get<std::string>();
config_[milvus::knowhere::IndexParams::m] = std::stoi(m);
}
/************************** Annoy Params *****************************/
check_parameter<int>(milvus::knowhere::IndexParams::n_trees, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::search_k, stoi_closure, std::nullopt);
/************************** NSG Parameter **************************/
if (!config_.contains(milvus::knowhere::IndexParams::knng)) {
} else {
auto knng = config_[milvus::knowhere::IndexParams::knng].get<std::string>();
config_[milvus::knowhere::IndexParams::knng] = std::stoi(knng);
}
/************************** PQ Params *****************************/
check_parameter<int>(milvus::knowhere::IndexParams::PQM, stoi_closure, std::nullopt);
if (!config_.contains(milvus::knowhere::IndexParams::search_length)) {
} else {
auto search_length = config_[milvus::knowhere::IndexParams::search_length].get<std::string>();
config_[milvus::knowhere::IndexParams::search_length] = std::stoi(search_length);
}
/************************** NGT Params *****************************/
check_parameter<int>(milvus::knowhere::IndexParams::edge_size, stoi_closure, std::nullopt);
if (!config_.contains(milvus::knowhere::IndexParams::out_degree)) {
} else {
auto out_degree = config_[milvus::knowhere::IndexParams::out_degree].get<std::string>();
config_[milvus::knowhere::IndexParams::out_degree] = std::stoi(out_degree);
}
/************************** NGT Search Params *****************************/
check_parameter<int>(milvus::knowhere::IndexParams::epsilon, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::max_search_edges, stoi_closure, std::nullopt);
if (!config_.contains(milvus::knowhere::IndexParams::candidate)) {
} else {
auto candidate = config_[milvus::knowhere::IndexParams::candidate].get<std::string>();
config_[milvus::knowhere::IndexParams::candidate] = std::stoi(candidate);
}
/************************** NGT_PANNG Params *****************************/
check_parameter<int>(milvus::knowhere::IndexParams::forcedly_pruned_edge_size, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::selectively_pruned_edge_size, stoi_closure, std::nullopt);
/************************** NGT_ONNG Params *****************************/
check_parameter<int>(milvus::knowhere::IndexParams::outgoing_edge_size, stoi_closure, std::nullopt);
check_parameter<int>(milvus::knowhere::IndexParams::incoming_edge_size, stoi_closure, std::nullopt);
/************************** Serialize Params *******************************/
check_parameter<int>(milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, stoi_closure, std::optional{4});
}
/************************** Serialize *******************************/
if (!config_.contains(milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE)) {
config_[milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE] = 4;
template <typename T>
void
IndexWrapper::check_parameter(const std::string& key, std::function<T(std::string)> fn, std::optional<T> default_v) {
if (!config_.contains(key)) {
if (default_v.has_value()) {
config_[key] = default_v.value();
}
} else {
auto slice_size = config_[milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE].get<std::string>();
config_[milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE] = std::stoi(slice_size);
auto value = config_[key];
config_[key] = fn(value);
}
}
......
......@@ -52,6 +52,12 @@ class IndexWrapper {
void
StoreRawData(const knowhere::DatasetPtr& dataset);
template <typename T>
void
check_parameter(const std::string& key,
std::function<T(std::string)> fn,
std::optional<T> default_v = std::nullopt);
public:
void
BuildWithIds(const knowhere::DatasetPtr& dataset);
......
......@@ -25,14 +25,17 @@ NM_List() {
static std::vector<std::string> ret{
milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
milvus::knowhere::IndexEnum::INDEX_NSG,
milvus::knowhere::IndexEnum::INDEX_RHNSWFlat,
};
return ret;
}
std::vector<std::string>
BIN_List() {
static std::vector<std::string> ret{milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT};
static std::vector<std::string> ret{
milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
};
return ret;
}
......@@ -40,7 +43,7 @@ std::vector<std::string>
Need_ID_List() {
static std::vector<std::string> ret{
// milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
// milvus::knowhere::IndexEnum::INDEX_NSG
// milvus::knowhere::IndexEnum::INDEX_NSG,
};
return ret;
......@@ -48,7 +51,9 @@ Need_ID_List() {
std::vector<std::string>
Need_BuildAll_list() {
static std::vector<std::string> ret{milvus::knowhere::IndexEnum::INDEX_NSG};
static std::vector<std::string> ret{
milvus::knowhere::IndexEnum::INDEX_NSG,
};
return ret;
}
......
......@@ -181,15 +181,15 @@ FillTargetEntry(CSegmentBase c_segment, CPlan c_plan, CQueryResult c_result) {
CStatus
UpdateSegmentIndex(CSegmentBase c_segment, CLoadIndexInfo c_load_index_info) {
auto status = CStatus();
try {
auto segment = (milvus::segcore::SegmentBase*)c_segment;
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
auto res = segment->LoadIndexing(*load_index_info);
auto status = CStatus();
status.error_code = Success;
status.error_msg = "";
return status;
} catch (std::exception& e) {
auto status = CStatus();
status.error_code = UnexpectedException;
status.error_msg = strdup(e.what());
return status;
......
......@@ -99,14 +99,105 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
{milvus::knowhere::Metric::TYPE, metric_type},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_NSG) {
return milvus::knowhere::Config{{milvus::knowhere::meta::DIM, DIM},
{milvus::knowhere::IndexParams::nlist, 163},
{milvus::knowhere::IndexParams::nprobe, 8},
{milvus::knowhere::IndexParams::knng, 20},
{milvus::knowhere::IndexParams::search_length, 40},
{milvus::knowhere::IndexParams::out_degree, 30},
{milvus::knowhere::IndexParams::candidate, 100},
{milvus::knowhere::Metric::TYPE, metric_type}};
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
{milvus::knowhere::IndexParams::nlist, 163},
{milvus::knowhere::IndexParams::nprobe, 8},
{milvus::knowhere::IndexParams::knng, 20},
{milvus::knowhere::IndexParams::search_length, 40},
{milvus::knowhere::IndexParams::out_degree, 30},
{milvus::knowhere::IndexParams::candidate, 100},
{milvus::knowhere::Metric::TYPE, metric_type},
};
#ifdef MILVUS_SUPPORT_SPTAG
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_SPTAG_KDT_RNT) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_SPTAG_BKT_RNT) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
};
#endif
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_HNSW) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::IndexParams::M, 16},
{milvus::knowhere::IndexParams::efConstruction, 200},
{milvus::knowhere::IndexParams::ef, 200},
{milvus::knowhere::Metric::TYPE, metric_type},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_ANNOY) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::IndexParams::n_trees, 4},
{milvus::knowhere::IndexParams::search_k, 100},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWFlat) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::IndexParams::M, 16},
{milvus::knowhere::IndexParams::efConstruction, 200},
{milvus::knowhere::IndexParams::ef, 200},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWPQ) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::IndexParams::M, 16},
{milvus::knowhere::IndexParams::efConstruction, 200},
{milvus::knowhere::IndexParams::ef, 200},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
{milvus::knowhere::IndexParams::PQM, 8},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWSQ) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::IndexParams::M, 16},
{milvus::knowhere::IndexParams::efConstruction, 200},
{milvus::knowhere::IndexParams::ef, 200},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTPANNG) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::IndexParams::edge_size, 10},
{milvus::knowhere::IndexParams::epsilon, 0.1},
{milvus::knowhere::IndexParams::max_search_edges, 50},
{milvus::knowhere::IndexParams::forcedly_pruned_edge_size, 60},
{milvus::knowhere::IndexParams::selectively_pruned_edge_size, 30},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
};
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTONNG) {
return milvus::knowhere::Config{
{milvus::knowhere::meta::DIM, DIM},
// {milvus::knowhere::meta::TOPK, 10},
{milvus::knowhere::Metric::TYPE, metric_type},
{milvus::knowhere::IndexParams::edge_size, 20},
{milvus::knowhere::IndexParams::epsilon, 0.1},
{milvus::knowhere::IndexParams::max_search_edges, 50},
{milvus::knowhere::IndexParams::outgoing_edge_size, 5},
{milvus::knowhere::IndexParams::incoming_edge_size, 40},
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
};
}
return milvus::knowhere::Config();
}
......@@ -366,6 +457,17 @@ INSTANTIATE_TEST_CASE_P(
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
milvus::knowhere::Metric::JACCARD),
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, milvus::knowhere::Metric::JACCARD),
#ifdef MILVUS_SUPPORT_SPTAG
std::pair(milvus::knowhere::IndexEnum::INDEX_SPTAG_KDT_RNT, milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_SPTAG_BKT_RNT, milvus::knowhere::Metric::L2),
#endif
std::pair(milvus::knowhere::IndexEnum::INDEX_HNSW, milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_ANNOY, milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_RHNSWFlat, milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_RHNSWPQ, milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_RHNSWSQ, milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_NGTPANNG, milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_NGTONNG, milvus::knowhere::Metric::L2),
std::pair(milvus::knowhere::IndexEnum::INDEX_NSG, milvus::knowhere::Metric::L2)));
TEST_P(IndexWrapperTest, Constructor) {
......
......@@ -181,6 +181,8 @@ func TestCIndex_Codec(t *testing.T) {
err = index.Delete()
assert.Equal(t, err, nil)
err = copyIndex.Delete()
assert.Equal(t, err, nil)
}
}
......
......@@ -68,6 +68,18 @@ func (scheduler *FlushScheduler) describe() error {
return err
}
for fieldID, data := range mapData {
// check field indexable
segMeta, err := scheduler.metaTable.GetSegmentByID(singleSegmentID)
if err != nil {
return err
}
indexable, err := scheduler.metaTable.IsIndexable(segMeta.CollectionID, fieldID)
if err != nil {
return err
}
if !indexable {
continue
}
info := &IndexBuildInfo{
segmentID: singleSegmentID,
fieldID: fieldID,
......
......@@ -5,6 +5,8 @@ import (
"strconv"
"sync"
"github.com/zilliztech/milvus-distributed/internal/proto/schemapb"
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
"github.com/zilliztech/milvus-distributed/internal/util/typeutil"
......@@ -678,3 +680,23 @@ func (mt *metaTable) UpdateFieldIndexParams(collName string, fieldName string, i
return fmt.Errorf("can not find field with id %s", fieldName)
}
func (mt *metaTable) IsIndexable(collID UniqueID, fieldID UniqueID) (bool, error) {
mt.ddLock.RLock()
defer mt.ddLock.RUnlock()
if _, ok := mt.collID2Meta[collID]; !ok {
return false, fmt.Errorf("can not find collection with id %d", collID)
}
for _, v := range mt.collID2Meta[collID].Schema.Fields {
// field is vector type and index params is not empty
if v.FieldID == fieldID && (v.DataType == schemapb.DataType_VECTOR_BINARY || v.DataType == schemapb.DataType_VECTOR_FLOAT) &&
len(v.IndexParams) != 0 {
return true, nil
}
}
// fieldID is not in schema(eg: timestamp) or not indexable
return false, nil
}
......@@ -5,6 +5,8 @@ import (
"testing"
"time"
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
"github.com/zilliztech/milvus-distributed/internal/proto/etcdpb"
"github.com/zilliztech/milvus-distributed/internal/proto/schemapb"
......@@ -43,7 +45,7 @@ func TestPersistenceScheduler(t *testing.T) {
Name: "testcoll",
Fields: []*schemapb.FieldSchema{
{FieldID: 1},
{FieldID: 100},
{FieldID: 100, DataType: schemapb.DataType_VECTOR_FLOAT, IndexParams: []*commonpb.KeyValuePair{{Key: "k", Value: "v"}}},
},
},
})
......
......@@ -16,6 +16,23 @@ import (
"github.com/zilliztech/milvus-distributed/internal/querynode/client"
)
//func TestLoadIndexClient_LoadIndex(t *testing.T) {
// pulsarURL := Params.PulsarAddress
// loadIndexChannels := Params.LoadIndexChannelNames
// loadIndexClient := client.NewLoadIndexClient(context.Background(), pulsarURL, loadIndexChannels)
//
// loadIndexPath := "collection0-segment0-field0"
// loadIndexPaths := make([]string, 0)
// loadIndexPaths = append(loadIndexPaths, loadIndexPath)
//
// indexParams := make(map[string]string)
// indexParams["index_type"] = "IVF_PQ"
// indexParams["index_mode"] = "cpu"
//
// loadIndexClient.LoadIndex(loadIndexPaths, 0, 0, "field0", indexParams)
// loadIndexClient.Close()
//}
func TestLoadIndexService(t *testing.T) {
node := newQueryNode()
collectionID := rand.Int63n(1000000)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册