diff --git a/CHANGELOG.md b/CHANGELOG.md index c6af48271bd86e4e0c0008550726cb4ba5ee6413..d7a7078dfdd9fa86ec0f15dc245bcd1b869d1402 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Please mark all change in change log and use the issue from GitHub - \#766 - If partition tag is similar, wrong partition is searched - \#771 - Add server build commit info interface - \#759 - Put C++ sdk out of milvus/core +- \#813 - Add push mode for prometheus monitor - \#815 - Support MinIO storage - \#910 - Change Milvus c++ standard to c++17 diff --git a/core/conf/demo/server_config.yaml b/core/conf/demo/server_config.yaml index e26ac8413bba670776cac84d535ae16ede6f5865..fa839a1c495eacdea1584911929ea62cdeb6f55c 100644 --- a/core/conf/demo/server_config.yaml +++ b/core/conf/demo/server_config.yaml @@ -20,7 +20,7 @@ version: 0.1 #----------------------+------------------------------------------------------------+------------+-----------------+ # Server Config | Description | Type | Default | #----------------------+------------------------------------------------------------+------------+-----------------+ -# address | IP address that Milvus server monitors. | String | 0.0.0.0 | +# address | IP address that Milvus server monitors. | Ip | 0.0.0.0 | #----------------------+------------------------------------------------------------+------------+-----------------+ # port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 | #----------------------+------------------------------------------------------------+------------+-----------------+ @@ -68,7 +68,7 @@ db_config: #----------------------+------------------------------------------------------------+------------+-----------------+ # minio_enable | Enable MinIO storage or not. | Boolean | false | #----------------------+------------------------------------------------------------+------------+-----------------+ -# minio_address | MinIO storage service IP address. | String | 127.0.0.1 | +# minio_address | MinIO storage service IP address. | Ip | 127.0.0.1 | #----------------------+------------------------------------------------------------+------------+-----------------+ # minio_port | MinIO storage service port. Port range (1024, 65535) | Integer | 9000 | #----------------------+------------------------------------------------------------+------------+-----------------+ @@ -95,13 +95,16 @@ storage_config: #----------------------+------------------------------------------------------------+------------+-----------------+ # collector | Connected monitoring system to collect metrics. | String | Prometheus | #----------------------+------------------------------------------------------------+------------+-----------------+ -# port | Port to visit Prometheus, port range (1024, 65535) | Integer | 8080 | +# address | Pushgateway address | IP | 127.0.0.1 + +#----------------------+------------------------------------------------------------+------------+-----------------+ +# port | Pushgateway port, port range (1024, 65535) | Integer | 9091 | #----------------------+------------------------------------------------------------+------------+-----------------+ metric_config: enable_monitor: false collector: prometheus prometheus_config: - port: 8080 + address: 127.0.0.1 + port: 9091 #----------------------+------------------------------------------------------------+------------+-----------------+ # Cache Config | Description | Type | Default | diff --git a/core/conf/server_cpu_config.template b/core/conf/server_cpu_config.template index 0f564367745101964d428f801483b695d18fadf5..60d31a1e54dd0f48f9b483397a5f13bd7396cd99 100644 --- a/core/conf/server_cpu_config.template +++ b/core/conf/server_cpu_config.template @@ -20,7 +20,7 @@ version: 0.1 #----------------------+------------------------------------------------------------+------------+-----------------+ # Server Config | Description | Type | Default | #----------------------+------------------------------------------------------------+------------+-----------------+ -# address | IP address that Milvus server monitors. | String | 0.0.0.0 | +# address | IP address that Milvus server monitors. | Ip | 0.0.0.0 | #----------------------+------------------------------------------------------------+------------+-----------------+ # port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 | #----------------------+------------------------------------------------------------+------------+-----------------+ @@ -68,7 +68,7 @@ db_config: #----------------------+------------------------------------------------------------+------------+-----------------+ # minio_enable | Enable MinIO storage or not. | Boolean | false | #----------------------+------------------------------------------------------------+------------+-----------------+ -# minio_address | MinIO storage service IP address. | String | 127.0.0.1 | +# minio_address | MinIO storage service IP address. | Ip | 127.0.0.1 | #----------------------+------------------------------------------------------------+------------+-----------------+ # minio_port | MinIO storage service port. Port range (1024, 65535) | Integer | 9000 | #----------------------+------------------------------------------------------------+------------+-----------------+ @@ -95,13 +95,16 @@ storage_config: #----------------------+------------------------------------------------------------+------------+-----------------+ # collector | Connected monitoring system to collect metrics. | String | Prometheus | #----------------------+------------------------------------------------------------+------------+-----------------+ -# port | Port to visit Prometheus, port range (1024, 65535) | Integer | 8080 | +# address | Pushgateway address | IP | 127.0.0.1 + +#----------------------+------------------------------------------------------------+------------+-----------------+ +# port | Pushgateway port, port range (1024, 65535) | Integer | 9091 | #----------------------+------------------------------------------------------------+------------+-----------------+ metric_config: enable_monitor: false collector: prometheus prometheus_config: - port: 8080 + address: 127.0.0.1 + port: 9091 #----------------------+------------------------------------------------------------+------------+-----------------+ # Cache Config | Description | Type | Default | diff --git a/core/conf/server_gpu_config.template b/core/conf/server_gpu_config.template index e622310a97af569eb88b31b673565f7e8249ce24..abc4b4a099345fdd9f7ee1a1825ea2f587c049af 100644 --- a/core/conf/server_gpu_config.template +++ b/core/conf/server_gpu_config.template @@ -20,7 +20,7 @@ version: 0.1 #----------------------+------------------------------------------------------------+------------+-----------------+ # Server Config | Description | Type | Default | #----------------------+------------------------------------------------------------+------------+-----------------+ -# address | IP address that Milvus server monitors. | String | 0.0.0.0 | +# address | IP address that Milvus server monitors. | IP | 0.0.0.0 | #----------------------+------------------------------------------------------------+------------+-----------------+ # port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 | #----------------------+------------------------------------------------------------+------------+-----------------+ @@ -68,7 +68,7 @@ db_config: #----------------------+------------------------------------------------------------+------------+-----------------+ # minio_enable | Enable MinIO storage or not. | Boolean | false | #----------------------+------------------------------------------------------------+------------+-----------------+ -# minio_address | MinIO storage service IP address. | String | 127.0.0.1 | +# minio_address | MinIO storage service IP address. | Ip | 127.0.0.1 | #----------------------+------------------------------------------------------------+------------+-----------------+ # minio_port | MinIO storage service port. Port range (1024, 65535) | Integer | 9000 | #----------------------+------------------------------------------------------------+------------+-----------------+ @@ -95,13 +95,16 @@ storage_config: #----------------------+------------------------------------------------------------+------------+-----------------+ # collector | Connected monitoring system to collect metrics. | String | Prometheus | #----------------------+------------------------------------------------------------+------------+-----------------+ -# port | Port to visit Prometheus, port range (1024, 65535) | Integer | 8080 | +# address | Pushgateway address | IP | 127.0.0.1 + +#----------------------+------------------------------------------------------------+------------+-----------------+ +# port | Pushgateway port, port range (1024, 65535) | Integer | 9091 | #----------------------+------------------------------------------------------------+------------+-----------------+ metric_config: enable_monitor: false collector: prometheus prometheus_config: - port: 8080 + address: 127.0.0.1 + port: 9091 #----------------------+------------------------------------------------------------+------------+-----------------+ # Cache Config | Description | Type | Default | diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index 7b3799550ed8679ba7a9f8ac2bf6d0f99c572284..1199479f88683eb714ab748eb5d1c0d0626a8202 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -137,6 +137,7 @@ set(prometheus_lib prometheus-cpp-push prometheus-cpp-pull prometheus-cpp-core + curl ) set(boost_lib diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 1e51368fded7ab2a3405c0792dbe4de72fb40967..ceac415eda85855bcf323603423ef7d30121086c 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -631,6 +631,7 @@ DBImpl::StartMetricTask() { server::Metrics::GetInstance().CPUCoreUsagePercentSet(); server::Metrics::GetInstance().GPUTemperature(); server::Metrics::GetInstance().CPUTemperature(); + server::Metrics::GetInstance().PushToGateway(); // ENGINE_LOG_TRACE << "Metric task finished"; } diff --git a/core/src/metrics/MetricBase.h b/core/src/metrics/MetricBase.h index eeca45e78923bb60678a0b464f78f0090422b82d..1da4f2726a25cc0a0fddb86c57e0e6af556b50f6 100644 --- a/core/src/metrics/MetricBase.h +++ b/core/src/metrics/MetricBase.h @@ -18,7 +18,7 @@ #pragma once #include "SystemInfo.h" -#include "utils/Error.h" +#include "utils/Status.h" #include @@ -32,8 +32,9 @@ class MetricsBase { return instance; } - virtual ErrorCode + virtual Status Init() { + return Status::OK(); } virtual void @@ -203,6 +204,10 @@ class MetricsBase { virtual void CPUTemperature() { } + + virtual void + PushToGateway() { + } }; } // namespace server diff --git a/core/src/metrics/prometheus/PrometheusMetrics.cpp b/core/src/metrics/prometheus/PrometheusMetrics.cpp index 19b2683280c721f4d0a6f964f70d5a03c49f6708..c27cf1feb8d334c1bc8092dca6984f04099e10a1 100644 --- a/core/src/metrics/prometheus/PrometheusMetrics.cpp +++ b/core/src/metrics/prometheus/PrometheusMetrics.cpp @@ -27,39 +27,48 @@ namespace milvus { namespace server { -ErrorCode +Status PrometheusMetrics::Init() { try { Config& config = Config::GetInstance(); Status s = config.GetMetricConfigEnableMonitor(startup_); if (!s.ok()) { - return s.code(); + return s; } if (!startup_) { - return SERVER_SUCCESS; + return Status::OK(); } // Following should be read from config file. - std::string bind_address; - s = config.GetMetricConfigPrometheusPort(bind_address); + std::string push_port, push_address; + s = config.GetMetricConfigPrometheusPort(push_port); if (!s.ok()) { - return s.code(); + return s; + } + s = config.GetMetricConfigPrometheusAddress(push_address); + if (!s.ok()) { + return s; } const std::string uri = std::string("/metrics"); const std::size_t num_threads = 2; + auto labels = prometheus::Gateway::GetInstanceLabel("pushgateway"); + + // Init pushgateway + gateway_ = std::make_shared(push_address, push_port, "milvus_metrics", labels); + // Init Exposer - exposer_ptr_ = std::make_shared(bind_address, uri, num_threads); + // exposer_ptr_ = std::make_shared(bind_address, uri, num_threads); - // Exposer Registry - exposer_ptr_->RegisterCollectable(registry_); + // Pushgateway Registry + gateway_->RegisterCollectable(registry_); } catch (std::exception& ex) { SERVER_LOG_ERROR << "Failed to connect prometheus server: " << std::string(ex.what()); - return SERVER_UNEXPECTED_ERROR; + return Status(SERVER_UNEXPECTED_ERROR, ex.what()); } - return SERVER_SUCCESS; + return Status::OK(); } void diff --git a/core/src/metrics/prometheus/PrometheusMetrics.h b/core/src/metrics/prometheus/PrometheusMetrics.h index 5a452ca02c7b59639acc4e303ab7ace655ef39b4..f2b56dc36214a57d3788c494c96e0b5b753f29b8 100644 --- a/core/src/metrics/prometheus/PrometheusMetrics.h +++ b/core/src/metrics/prometheus/PrometheusMetrics.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include #include @@ -25,7 +26,8 @@ #include #include "metrics/MetricBase.h" -#include "utils/Error.h" +#include "utils/Log.h" +#include "utils/Status.h" #define METRICS_NOW_TIME std::chrono::system_clock::now() //#define server::Metrics::GetInstance() server::GetInstance() @@ -42,11 +44,11 @@ class PrometheusMetrics : public MetricsBase { return instance; } - ErrorCode - Init(); + Status + Init() override; private: - std::shared_ptr exposer_ptr_; + std::shared_ptr gateway_; std::shared_ptr registry_ = std::make_shared(); bool startup_ = false; @@ -293,9 +295,18 @@ class PrometheusMetrics : public MetricsBase { void CPUTemperature() override; - std::shared_ptr& - exposer_ptr() { - return exposer_ptr_; + void + PushToGateway() override { + if (startup_) { + if (gateway_->Push() != 200) { + ENGINE_LOG_WARNING << "Metrics pushgateway failed"; + } + } + } + + std::shared_ptr& + gateway() { + return gateway_; } // prometheus::Exposer& exposer() { return exposer_;} diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index cfc3959fb4c41cece66f75100dc053d2df6143af..f5e9a42a82df4cad3bdf1aeb55b6977d8369630f 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -134,6 +134,9 @@ Config::ValidateConfig() { std::string metric_collector; CONFIG_CHECK(GetMetricConfigCollector(metric_collector)); + std::string metric_prometheus_address; + CONFIG_CHECK(GetMetricConfigPrometheusAddress(metric_prometheus_address)); + std::string metric_prometheus_port; CONFIG_CHECK(GetMetricConfigPrometheusPort(metric_prometheus_port)); @@ -214,6 +217,7 @@ Config::ResetDefaultConfig() { /* metric config */ CONFIG_CHECK(SetMetricConfigEnableMonitor(CONFIG_METRIC_ENABLE_MONITOR_DEFAULT)); CONFIG_CHECK(SetMetricConfigCollector(CONFIG_METRIC_COLLECTOR_DEFAULT)); + CONFIG_CHECK(SetMetricConfigPrometheusAddress(CONFIG_METRIC_PROMETHEUS_ADDRESS_DEFAULT)); CONFIG_CHECK(SetMetricConfigPrometheusPort(CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT)); /* cache config */ @@ -556,6 +560,16 @@ Config::CheckMetricConfigCollector(const std::string& value) { return Status::OK(); } +Status +Config::CheckMetricConfigPrometheusAddress(const std::string& value) { + if (!ValidationUtil::ValidateIpAddress(value).ok()) { + std::string msg = + "Invalid metric ip: " + value + ". Possible reason: metric_config.prometheus_config.ip is invalid."; + return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_ip: " + value); + } + return Status::OK(); +} + Status Config::CheckMetricConfigPrometheusPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { @@ -999,6 +1013,12 @@ Config::GetMetricConfigCollector(std::string& value) { return Status::OK(); } +Status +Config::GetMetricConfigPrometheusAddress(std::string& value) { + value = GetConfigStr(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_ADDRESS, CONFIG_METRIC_PROMETHEUS_ADDRESS_DEFAULT); + return Status::OK(); +} + Status Config::GetMetricConfigPrometheusPort(std::string& value) { value = GetConfigStr(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_PORT, CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT); @@ -1272,6 +1292,12 @@ Config::SetMetricConfigCollector(const std::string& value) { return SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_COLLECTOR, value); } +Status +Config::SetMetricConfigPrometheusAddress(const std::string& value) { + CONFIG_CHECK(CheckMetricConfigPrometheusAddress(value)); + SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_ADDRESS, value); +} + Status Config::SetMetricConfigPrometheusPort(const std::string& value) { CONFIG_CHECK(CheckMetricConfigPrometheusPort(value)); diff --git a/core/src/server/Config.h b/core/src/server/Config.h index ebc121d4e397c981d6bd02fb7d66511f4ba8328e..e37bc82647e79fce7ebc0dacf2bf7cd97a3412a0 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -98,8 +98,10 @@ static const char* CONFIG_METRIC_ENABLE_MONITOR_DEFAULT = "false"; static const char* CONFIG_METRIC_COLLECTOR = "collector"; static const char* CONFIG_METRIC_COLLECTOR_DEFAULT = "prometheus"; static const char* CONFIG_METRIC_PROMETHEUS = "prometheus_config"; +static const char* CONFIG_METRIC_PROMETHEUS_ADDRESS = "address"; +static const char* CONFIG_METRIC_PROMETHEUS_ADDRESS_DEFAULT = "127.0.0.1"; static const char* CONFIG_METRIC_PROMETHEUS_PORT = "port"; -static const char* CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT = "8080"; +static const char* CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT = "9091"; /* engine config */ static const char* CONFIG_ENGINE = "engine_config"; @@ -212,6 +214,8 @@ class Config { Status CheckMetricConfigCollector(const std::string& value); Status + CheckMetricConfigPrometheusAddress(const std::string& value); + Status CheckMetricConfigPrometheusPort(const std::string& value); /* cache config */ @@ -300,6 +304,8 @@ class Config { Status GetMetricConfigCollector(std::string& value); Status + GetMetricConfigPrometheusAddress(std::string& value); + Status GetMetricConfigPrometheusPort(std::string& value); /* cache config */ @@ -382,6 +388,8 @@ class Config { Status SetMetricConfigCollector(const std::string& value); Status + SetMetricConfigPrometheusAddress(const std::string& value); + Status SetMetricConfigPrometheusPort(const std::string& value); /* cache config */ diff --git a/core/src/server/grpc_impl/GrpcServer.cpp b/core/src/server/grpc_impl/GrpcServer.cpp index 088eb71be0991adb4f450bfb978d6df1d5addc4d..52cc48b95e4eb8565e4f8dabd08a1922fe7cd2ad 100644 --- a/core/src/server/grpc_impl/GrpcServer.cpp +++ b/core/src/server/grpc_impl/GrpcServer.cpp @@ -55,6 +55,7 @@ class NoReusePortOption : public ::grpc::ServerBuilderOption { void UpdateArguments(::grpc::ChannelArguments* args) override { args->SetInt(GRPC_ARG_ALLOW_REUSEPORT, 0); + args->SetInt(GRPC_ARG_MAX_CONCURRENT_STREAMS, 20); } void diff --git a/core/unittest/metrics/test_metricbase.cpp b/core/unittest/metrics/test_metricbase.cpp index 73c9954e7285a3ab0f4e35a215272462ed8da8fb..f4fd38431086c7095f80facd8ebbe71ba7d6cd9e 100644 --- a/core/unittest/metrics/test_metricbase.cpp +++ b/core/unittest/metrics/test_metricbase.cpp @@ -62,5 +62,6 @@ TEST(MetricbaseTest, METRICBASE_TEST) { instance.ConnectionGaugeIncrement(); instance.ConnectionGaugeDecrement(); instance.KeepingAliveCounterIncrement(); + instance.PushToGateway(); instance.OctetsSet(); } diff --git a/core/unittest/metrics/test_prometheus.cpp b/core/unittest/metrics/test_prometheus.cpp index 6e339b73b43ef30e74bf3ef1a32fa537ce2b018c..d4da5aaa641a5c8066b6b9342b5c2f61b277f97a 100644 --- a/core/unittest/metrics/test_prometheus.cpp +++ b/core/unittest/metrics/test_prometheus.cpp @@ -67,6 +67,7 @@ TEST(PrometheusTest, PROMETHEUS_TEST) { instance.ConnectionGaugeIncrement(); instance.ConnectionGaugeDecrement(); instance.KeepingAliveCounterIncrement(); + instance.PushToGateway(); instance.OctetsSet(); instance.CPUCoreUsagePercentSet(); diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index c669ffa7e19090249e5f0a3df518d0d934a82470..69259484ace4e4882eb4b38d4649d2f904475e52 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -218,6 +218,10 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { ASSERT_TRUE(config.GetMetricConfigCollector(str_val).ok()); ASSERT_TRUE(str_val == metric_collector); + std::string metric_prometheus_address = "127.0.0.1"; + ASSERT_TRUE(config.GetMetricConfigPrometheusAddress(str_val).ok()); + ASSERT_TRUE(str_val == metric_prometheus_address); + std::string metric_prometheus_port = "2222"; ASSERT_TRUE(config.SetMetricConfigPrometheusPort(metric_prometheus_port).ok()); ASSERT_TRUE(config.GetMetricConfigPrometheusPort(str_val).ok()); @@ -298,12 +302,14 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { #endif } -std::string gen_get_command(const std::string& parent_node, const std::string& child_node) { +std::string +gen_get_command(const std::string& parent_node, const std::string& child_node) { std::string cmd = "get_config " + parent_node + ms::CONFIG_NODE_DELIMITER + child_node; return cmd; } -std::string gen_set_command(const std::string& parent_node, const std::string& child_node, const std::string& value) { +std::string +gen_set_command(const std::string& parent_node, const std::string& child_node, const std::string& value) { std::string cmd = "set_config " + parent_node + ms::CONFIG_NODE_DELIMITER + child_node + " " + value; return cmd; } @@ -519,6 +525,8 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { ASSERT_FALSE(config.SetMetricConfigCollector("zilliz").ok()); + ASSERT_FALSE(config.SetMetricConfigPrometheusAddress("127.0.0").ok()); + ASSERT_FALSE(config.SetMetricConfigPrometheusPort("0xff").ok()); /* cache config */