diff --git a/configs/advanced/segcore.yaml b/configs/advanced/segcore.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e8169030fcf24898d16e552551d09d69593b038 --- /dev/null +++ b/configs/advanced/segcore.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2019-2020 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under the License. + +segcore: + chunk_size: 32768 + diff --git a/internal/core/bench/bench_search.cpp b/internal/core/bench/bench_search.cpp index bb7b38852c108f2a3feaf10763f8d5a7267897de..d4ad0b53e317e26876a8c00fd1f0e5cf36299f14 100644 --- a/internal/core/bench/bench_search.cpp +++ b/internal/core/bench/bench_search.cpp @@ -73,7 +73,7 @@ Search_SmallIndex(benchmark::State& state) { segconf.set_size_per_chunk(chunk_size); auto segment = CreateGrowingSegment(schema, segconf); if (!is_small_index) { - segment->debug_disable_small_index(); + segment->disable_small_index(); } segment->PreInsert(N); ColumnBasedRawData raw_data; diff --git a/internal/core/src/common/FieldMeta.h b/internal/core/src/common/FieldMeta.h index 394149d8f5c0acdff1c553ceb60480773f903a74..4d4444591345dd666fc6691665300f86bd1e3cc7 100644 --- a/internal/core/src/common/FieldMeta.h +++ b/internal/core/src/common/FieldMeta.h @@ -23,10 +23,6 @@ datatype_sizeof(DataType data_type, int dim = 1) { switch (data_type) { case DataType::BOOL: return sizeof(bool); - case DataType::DOUBLE: - return sizeof(double); - case DataType::FLOAT: - return sizeof(float); case DataType::INT8: return sizeof(int8_t); case DataType::INT16: @@ -35,6 +31,10 @@ datatype_sizeof(DataType data_type, int dim = 1) { return sizeof(int32_t); case DataType::INT64: return sizeof(int64_t); + case DataType::FLOAT: + return sizeof(float); + case DataType::DOUBLE: + return sizeof(double); case DataType::VECTOR_FLOAT: return sizeof(float) * dim; case DataType::VECTOR_BINARY: { @@ -43,7 +43,6 @@ datatype_sizeof(DataType data_type, int dim = 1) { } default: { throw std::invalid_argument("unsupported data type"); - return 0; } } } @@ -54,10 +53,6 @@ datatype_name(DataType data_type) { switch (data_type) { case DataType::BOOL: return "bool"; - case DataType::DOUBLE: - return "double"; - case DataType::FLOAT: - return "float"; case DataType::INT8: return "int8_t"; case DataType::INT16: @@ -66,6 +61,10 @@ datatype_name(DataType data_type) { return "int32_t"; case DataType::INT64: return "int64_t"; + case DataType::FLOAT: + return "float"; + case DataType::DOUBLE: + return "double"; case DataType::VECTOR_FLOAT: return "vector_float"; case DataType::VECTOR_BINARY: { @@ -84,7 +83,7 @@ datatype_is_vector(DataType datatype) { } inline bool -datatype_is_interger(DataType datatype) { +datatype_is_integer(DataType datatype) { switch (datatype) { case DataType::INT8: case DataType::INT16: @@ -99,8 +98,8 @@ datatype_is_interger(DataType datatype) { inline bool datatype_is_floating(DataType datatype) { switch (datatype) { - case DataType::DOUBLE: case DataType::FLOAT: + case DataType::DOUBLE: return true; default: return false; @@ -166,7 +165,7 @@ class FieldMeta { if (is_vector()) { return datatype_sizeof(type_, get_dim()); } else { - return datatype_sizeof(type_, 1); + return datatype_sizeof(type_); } } diff --git a/internal/core/src/common/Span.h b/internal/core/src/common/Span.h index f0d34779d3c0cffd89a2d4707389bde7caba8d23..de664156fd92ef21b195928139b50b995cbba561 100644 --- a/internal/core/src/common/Span.h +++ b/internal/core/src/common/Span.h @@ -23,11 +23,6 @@ class SpanBase { : data_(data), row_count_(row_count), element_sizeof_(element_sizeof) { } - int64_t - get_element_sizeof() const { - return element_sizeof_; - } - int64_t row_count() const { return row_count_; diff --git a/internal/core/src/common/VectorTrait.h b/internal/core/src/common/VectorTrait.h index 65b96a83ba8e8b3188f96652a43f1888ec562c2b..301e5e419eddab16d59d0cc9b16c662677a233fa 100644 --- a/internal/core/src/common/VectorTrait.h +++ b/internal/core/src/common/VectorTrait.h @@ -30,7 +30,7 @@ class BinaryVector : public VectorTrait { template inline constexpr int64_t -get_element_sizeof(int64_t dim) { +element_sizeof(int64_t dim) { static_assert(std::is_base_of_v); if constexpr (std::is_same_v) { return dim * sizeof(float); diff --git a/internal/core/src/segcore/SegcoreConfig.cpp b/internal/core/src/segcore/SegcoreConfig.cpp index 8bc72ee627f6efcbd1367f1b673717bf04f75db6..68b9da6e8bccb0f734e7aa8dab67d98aa4823d22 100644 --- a/internal/core/src/segcore/SegcoreConfig.cpp +++ b/internal/core/src/segcore/SegcoreConfig.cpp @@ -42,16 +42,16 @@ apply_parser(const YAML::Node& node, Func func) { return results; } -SegcoreConfig +void SegcoreConfig::parse_from(const std::string& config_path) { try { - SegcoreConfig result; YAML::Node top_config = YAML::LoadFile(config_path); Assert(top_config.IsMap()); auto seg_config = subnode(top_config, "segcore"); auto chunk_size = subnode(seg_config, "chunk_size").as(); - result.size_per_chunk_ = chunk_size; + this->size_per_chunk_ = chunk_size; +#if 0 auto index_list = subnode(seg_config, "small_index"); Assert(index_list.IsSequence()); @@ -92,16 +92,16 @@ SegcoreConfig::parse_from(const std::string& config_path) { } for (auto metric_type : metric_types) { - Assert(result.table_.count(metric_type)); + Assert(result.table_.count(metric_type) == 0); result.table_[metric_type] = conf; } } - return result; +#endif } catch (const SegcoreError& e) { - // re-throw throw e; } catch (const std::exception& e) { - PanicInfo(std::string("Invalid Yaml:\n") + e.what()); + std::string str = std::string("Invalid Yaml: ") + config_path + ", err: " + e.what(); + PanicInfo(str); } } diff --git a/internal/core/src/segcore/SegcoreConfig.h b/internal/core/src/segcore/SegcoreConfig.h index 21cd0414a34325133814de9e48074e33b751ee00..c2b820250e6af020029d693b79054ae0c0fb042c 100644 --- a/internal/core/src/segcore/SegcoreConfig.h +++ b/internal/core/src/segcore/SegcoreConfig.h @@ -10,10 +10,12 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #pragma once -#include "common/Types.h" -#include "utils/Json.h" + #include #include +#include "common/Types.h" +#include "exceptions/EasyAssert.h" +#include "utils/Json.h" namespace milvus::segcore { @@ -24,23 +26,28 @@ struct SmallIndexConf { }; class SegcoreConfig { - public: - static SegcoreConfig - parse_from(const std::string& string_path); - static SegcoreConfig - default_config() { - // TODO: remove this when go side is ready - SegcoreConfig config; - config.set_size_per_chunk(32 * 1024); + private: + SegcoreConfig() { + // hard code configurations for small index SmallIndexConf sub_conf; sub_conf.build_params["nlist"] = 100; sub_conf.search_params["nprobe"] = 4; sub_conf.index_type = "IVF"; - config.table_[MetricType::METRIC_L2] = sub_conf; - config.table_[MetricType::METRIC_INNER_PRODUCT] = sub_conf; + table_[MetricType::METRIC_L2] = sub_conf; + table_[MetricType::METRIC_INNER_PRODUCT] = sub_conf; + } + + public: + static SegcoreConfig& + default_config() { + // TODO: remove this when go side is ready + static SegcoreConfig config; return config; } + void + parse_from(const std::string& string_path); + const SmallIndexConf& at(MetricType metric_type) const { Assert(table_.count(metric_type)); @@ -62,11 +69,8 @@ class SegcoreConfig { table_[metric_type] = small_index_conf; } - protected: - SegcoreConfig() = default; - private: - int64_t size_per_chunk_ = -1; + int64_t size_per_chunk_ = 32768; std::map table_; }; diff --git a/internal/core/src/segcore/SegmentGrowing.h b/internal/core/src/segcore/SegmentGrowing.h index e8e09cefddaf64bd734300d058006c7c6db41a0c..7207a660ec5260e81ced397ae569321e63bdf724 100644 --- a/internal/core/src/segcore/SegmentGrowing.h +++ b/internal/core/src/segcore/SegmentGrowing.h @@ -41,7 +41,7 @@ TestABI(); class SegmentGrowing : public SegmentInternalInterface { public: virtual void - debug_disable_small_index() = 0; + disable_small_index() = 0; virtual int64_t PreInsert(int64_t size) = 0; diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 95e85ff30b368a38bd2956287c7c7f7bbf913491..acb7f3d0af058c34f336e46c02ade80c5f68cdbf 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -202,9 +202,9 @@ SegmentGrowingImpl::do_insert(int64_t reserved_begin, } record_.ack_responder_.AddSegment(reserved_begin, reserved_begin + size); - if (!debug_disable_small_index_) { - indexing_record_.UpdateResourceAck(record_.ack_responder_.GetAck() / segcore_config_.get_size_per_chunk(), - record_); + if (enable_small_index_) { + int64_t chunk_size = segcore_config_.get_size_per_chunk(); + indexing_record_.UpdateResourceAck(record_.ack_responder_.GetAck() / chunk_size, record_); } } diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index cf19b85ba3d15c411895f3605d17046cd2345a73..c2baf26b87c895e67647ffdd57afb065d44035ef 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -115,8 +115,8 @@ class SegmentGrowingImpl : public SegmentGrowing { public: // only for debug void - debug_disable_small_index() override { - debug_disable_small_index_ = true; + disable_small_index() override { + enable_small_index_ = false; } ssize_t @@ -217,7 +217,7 @@ class SegmentGrowingImpl : public SegmentGrowing { tbb::concurrent_unordered_multimap uid2offset_; private: - bool debug_disable_small_index_ = false; + bool enable_small_index_ = true; }; } // namespace milvus::segcore diff --git a/internal/core/src/segcore/segcore_init_c.cpp b/internal/core/src/segcore/segcore_init_c.cpp index 1f3281e247df42c6e5f029a4e9ceaf4ee9c568bf..c565600cb1fb3e70bce79f4f2924c7bdf1333386 100644 --- a/internal/core/src/segcore/segcore_init_c.cpp +++ b/internal/core/src/segcore/segcore_init_c.cpp @@ -9,15 +9,17 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License -#include "index/thirdparty/faiss/FaissHook.h" -#include "segcore/segcore_init_c.h" -#include "knowhere/archive/KnowhereConfig.h" #include + +#include "exceptions/EasyAssert.h" +#include "knowhere/archive/KnowhereConfig.h" +#include "segcore/segcore_init_c.h" +#include "segcore/SegcoreConfig.h" #include "utils/Log.h" namespace milvus::segcore { static void -SegcoreInitImpl() { +SegcoreInitImpl(const char* config_dir) { namespace eg = milvus::engine; eg::KnowhereConfig::SetSimdType(eg::KnowhereConfig::SimdType::AUTO); eg::KnowhereConfig::SetBlasThreshold(16384); @@ -26,10 +28,23 @@ SegcoreInitImpl() { eg::KnowhereConfig::SetStatisticsLevel(0); el::Configurations el_conf; el_conf.setGlobally(el::ConfigurationType::Enabled, std::to_string(false)); + + // initializing segcore config + try { + SegcoreConfig& config = SegcoreConfig::default_config(); + if (config_dir != NULL) { + std::string config_file = std::string(config_dir) + "advanced/segcore.yaml"; + config.parse_from(config_file); + std::cout << "Parse config file: " << config_file << ", chunk_size: " << config.get_size_per_chunk() + << std::endl; + } + } catch (std::exception& e) { + PanicInfo("parse config fail: " + std::string(e.what())); + } } } // namespace milvus::segcore extern "C" void -SegcoreInit() { - milvus::segcore::SegcoreInitImpl(); +SegcoreInit(const char* config_dir) { + milvus::segcore::SegcoreInitImpl(config_dir); } diff --git a/internal/core/src/segcore/segcore_init_c.h b/internal/core/src/segcore/segcore_init_c.h index 6d22ab1b5ef2ea0e9b3d8cdc2f22f18cda8e621b..3ceb63860a4b90416b158c94a005d95bedfe3fbe 100644 --- a/internal/core/src/segcore/segcore_init_c.h +++ b/internal/core/src/segcore/segcore_init_c.h @@ -8,12 +8,15 @@ // Unless required by applicable law or agreed to in writing, software distributed under the License // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License + +#pragma once + #ifdef __cplusplus extern "C" { #endif void -SegcoreInit(); +SegcoreInit(const char* config_dir); #ifdef __cplusplus } diff --git a/internal/core/unittest/test_init.cpp b/internal/core/unittest/test_init.cpp index 30a77c565cc1c2e44a939863e413af8b0dbb7f79..4866541fba59369f1e7e0a663c5f66d4bfceda7b 100644 --- a/internal/core/unittest/test_init.cpp +++ b/internal/core/unittest/test_init.cpp @@ -19,5 +19,5 @@ TEST(Init, Naive) { using namespace milvus; using namespace milvus::segcore; - SegcoreInit(); + SegcoreInit(NULL); } \ No newline at end of file diff --git a/internal/core/unittest/test_plan_proto.cpp b/internal/core/unittest/test_plan_proto.cpp index e77bb58d14cab3ee42d8e170100b5bc5882059ba..a4a2217de04a5fa7d8b3535adf967fbae0c0ff08 100644 --- a/internal/core/unittest/test_plan_proto.cpp +++ b/internal/core/unittest/test_plan_proto.cpp @@ -71,7 +71,7 @@ TEST_P(PlanProtoTest, Range) { string value_tag = "bool_val"; if (datatype_is_floating((DataType)data_type)) { value_tag = "float_val"; - } else if (datatype_is_interger((DataType)data_type)) { + } else if (datatype_is_integer((DataType)data_type)) { value_tag = "int64_val"; } @@ -152,7 +152,7 @@ TEST_P(PlanProtoTest, TermExpr) { string value_tag = "bool_val"; if (datatype_is_floating((DataType)data_type)) { value_tag = "float_val"; - } else if (datatype_is_interger((DataType)data_type)) { + } else if (datatype_is_integer((DataType)data_type)) { value_tag = "int64_val"; } @@ -239,7 +239,7 @@ TEST(PlanProtoTest, NotExpr) { string value_tag = "bool_val"; if (datatype_is_floating((DataType)data_type)) { value_tag = "float_val"; - } else if (datatype_is_interger((DataType)data_type)) { + } else if (datatype_is_integer((DataType)data_type)) { value_tag = "int64_val"; } @@ -330,7 +330,7 @@ TEST(PlanProtoTest, AndOrExpr) { string value_tag = "bool_val"; if (datatype_is_floating((DataType)data_type)) { value_tag = "float_val"; - } else if (datatype_is_interger((DataType)data_type)) { + } else if (datatype_is_integer((DataType)data_type)) { value_tag = "int64_val"; } diff --git a/internal/querynode/query_node.go b/internal/querynode/query_node.go index 69f3239abf76183d2417871b1282da7d762660b5..f02c734f63304967a6ef349a87f8264614045317 100644 --- a/internal/querynode/query_node.go +++ b/internal/querynode/query_node.go @@ -29,6 +29,7 @@ import ( "errors" "strconv" "sync/atomic" + "unsafe" "go.uber.org/zap" @@ -123,7 +124,9 @@ func (node *QueryNode) Init() error { node.etcdKV) node.streaming = newStreaming(node.queryNodeLoopCtx, node.msFactory, node.etcdKV) - C.SegcoreInit() + cConfigDir := C.CString(Params.BaseTable.GetConfigDir()) + C.SegcoreInit(cConfigDir) + C.free(unsafe.Pointer(cConfigDir)) if node.rootCoord == nil { log.Error("null root coordinator detected") diff --git a/internal/querynode/segment.go b/internal/querynode/segment.go index 8c38582861f0e76cb547b017f3a8734e08af7f45..fde1e99c39ef287d0d89cda007c10b845c18a462 100644 --- a/internal/querynode/segment.go +++ b/internal/querynode/segment.go @@ -186,7 +186,7 @@ func newSegment(collection *Collection, segmentID int64, partitionID UniqueID, c return nil } - log.Debug("create segment", zap.Int64("segmentID", segmentID)) + log.Debug("create segment", zap.Int64("segmentID", segmentID), zap.Int32("segmentType", int32(segType))) var segment = &Segment{ segmentPtr: segmentPtr, diff --git a/internal/util/paramtable/basetable.go b/internal/util/paramtable/basetable.go index 720bb2b58cb43d99700a3cabf60157625fa53d59..0c24a0c25dc6d22205d651ffcc4a3ffd47d29fd5 100644 --- a/internal/util/paramtable/basetable.go +++ b/internal/util/paramtable/basetable.go @@ -12,16 +12,17 @@ package paramtable import ( - "log" "os" "path" "runtime" "strconv" "strings" - "github.com/milvus-io/milvus/internal/proto/commonpb" + "go.uber.org/zap" memkv "github.com/milvus-io/milvus/internal/kv/mem" + "github.com/milvus-io/milvus/internal/log" + "github.com/milvus-io/milvus/internal/proto/commonpb" "github.com/milvus-io/milvus/internal/util/typeutil" "github.com/spf13/cast" "github.com/spf13/viper" @@ -39,29 +40,42 @@ type Base interface { } type BaseTable struct { - params *memkv.MemoryKV + params *memkv.MemoryKV + configDir string } func (gp *BaseTable) Init() { gp.params = memkv.NewMemoryKV() - err := gp.LoadYaml("milvus.yaml") - if err != nil { - panic(err) + _, fpath, _, _ := runtime.Caller(0) + configDir := path.Dir(fpath) + "/../../../configs/" + if _, err := os.Stat(configDir); err != nil { + log.Warn("cannot access config directory", zap.String("configDir", configDir), zap.Error(err)) + if runPath, err1 := os.Getwd(); err1 != nil { + panic(err1.Error()) + } else { + configDir = runPath + "/configs/" + } } + gp.configDir = configDir + log.Debug("config directory", zap.String("configDir", gp.configDir)) - err = gp.LoadYaml("advanced/common.yaml") - if err != nil { + if err := gp.LoadYaml("milvus.yaml"); err != nil { panic(err) } - - err = gp.LoadYaml("advanced/channel.yaml") - if err != nil { + if err := gp.LoadYaml("advanced/common.yaml"); err != nil { + panic(err) + } + if err := gp.LoadYaml("advanced/channel.yaml"); err != nil { panic(err) } gp.tryloadFromEnv() } +func (gp *BaseTable) GetConfigDir() string { + return gp.configDir +} + func (gp *BaseTable) LoadFromKVPair(kvPairs []*commonpb.KeyValuePair) error { for _, pair := range kvPairs { err := gp.Save(pair.Key, pair.Value) @@ -215,15 +229,9 @@ func (gp *BaseTable) LoadRange(key, endKey string, limit int) ([]string, []strin func (gp *BaseTable) LoadYaml(fileName string) error { config := viper.New() - _, fpath, _, _ := runtime.Caller(0) - configFile := path.Dir(fpath) + "/../../../configs/" + fileName - _, err := os.Stat(configFile) - if os.IsNotExist(err) { - runPath, err := os.Getwd() - if err != nil { - panic(err) - } - configFile = runPath + "/configs/" + fileName + configFile := gp.configDir + fileName + if _, err := os.Stat(configFile); err != nil { + panic("cannot access config file: " + configFile) } config.SetConfigFile(configFile) @@ -241,7 +249,7 @@ func (gp *BaseTable) LoadYaml(fileName string) error { for _, v := range val { ss, err := cast.ToStringE(v) if err != nil { - log.Panic(err) + panic(err) } if len(str) == 0 { str = ss @@ -251,7 +259,7 @@ func (gp *BaseTable) LoadYaml(fileName string) error { } default: - log.Panicf("undefine config type, key=%s", key) + panic("undefined config type, key=" + key) } } err = gp.params.Save(strings.ToLower(key), str)