提交 a92a43e2 编写于 作者: Y yukun 提交者: Jin Hai

Add push mode for prometheus monitor (#905)

* Add push mode for prometheus monitor

* format code

* fix for comments

* fix test_MetricBase bug

* Change ip to address in config
上级 28e61ee4
......@@ -18,6 +18,7 @@ Please mark all change in change log and use the issue from GitHub
- \#766 - If partition tag is similar, wrong partition is searched
- \#771 - Add server build commit info interface
- \#759 - Put C++ sdk out of milvus/core
- \#813 - Add push mode for prometheus monitor
- \#815 - Support MinIO storage
- \#910 - Change Milvus c++ standard to c++17
......
......@@ -20,7 +20,7 @@ version: 0.1
#----------------------+------------------------------------------------------------+------------+-----------------+
# Server Config | Description | Type | Default |
#----------------------+------------------------------------------------------------+------------+-----------------+
# address | IP address that Milvus server monitors. | String | 0.0.0.0 |
# address | IP address that Milvus server monitors. | Ip | 0.0.0.0 |
#----------------------+------------------------------------------------------------+------------+-----------------+
# port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 |
#----------------------+------------------------------------------------------------+------------+-----------------+
......@@ -68,7 +68,7 @@ db_config:
#----------------------+------------------------------------------------------------+------------+-----------------+
# minio_enable | Enable MinIO storage or not. | Boolean | false |
#----------------------+------------------------------------------------------------+------------+-----------------+
# minio_address | MinIO storage service IP address. | String | 127.0.0.1 |
# minio_address | MinIO storage service IP address. | Ip | 127.0.0.1 |
#----------------------+------------------------------------------------------------+------------+-----------------+
# minio_port | MinIO storage service port. Port range (1024, 65535) | Integer | 9000 |
#----------------------+------------------------------------------------------------+------------+-----------------+
......@@ -95,13 +95,16 @@ storage_config:
#----------------------+------------------------------------------------------------+------------+-----------------+
# collector | Connected monitoring system to collect metrics. | String | Prometheus |
#----------------------+------------------------------------------------------------+------------+-----------------+
# port | Port to visit Prometheus, port range (1024, 65535) | Integer | 8080 |
# address | Pushgateway address | IP | 127.0.0.1 +
#----------------------+------------------------------------------------------------+------------+-----------------+
# port | Pushgateway port, port range (1024, 65535) | Integer | 9091 |
#----------------------+------------------------------------------------------------+------------+-----------------+
metric_config:
enable_monitor: false
collector: prometheus
prometheus_config:
port: 8080
address: 127.0.0.1
port: 9091
#----------------------+------------------------------------------------------------+------------+-----------------+
# Cache Config | Description | Type | Default |
......
......@@ -20,7 +20,7 @@ version: 0.1
#----------------------+------------------------------------------------------------+------------+-----------------+
# Server Config | Description | Type | Default |
#----------------------+------------------------------------------------------------+------------+-----------------+
# address | IP address that Milvus server monitors. | String | 0.0.0.0 |
# address | IP address that Milvus server monitors. | Ip | 0.0.0.0 |
#----------------------+------------------------------------------------------------+------------+-----------------+
# port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 |
#----------------------+------------------------------------------------------------+------------+-----------------+
......@@ -68,7 +68,7 @@ db_config:
#----------------------+------------------------------------------------------------+------------+-----------------+
# minio_enable | Enable MinIO storage or not. | Boolean | false |
#----------------------+------------------------------------------------------------+------------+-----------------+
# minio_address | MinIO storage service IP address. | String | 127.0.0.1 |
# minio_address | MinIO storage service IP address. | Ip | 127.0.0.1 |
#----------------------+------------------------------------------------------------+------------+-----------------+
# minio_port | MinIO storage service port. Port range (1024, 65535) | Integer | 9000 |
#----------------------+------------------------------------------------------------+------------+-----------------+
......@@ -95,13 +95,16 @@ storage_config:
#----------------------+------------------------------------------------------------+------------+-----------------+
# collector | Connected monitoring system to collect metrics. | String | Prometheus |
#----------------------+------------------------------------------------------------+------------+-----------------+
# port | Port to visit Prometheus, port range (1024, 65535) | Integer | 8080 |
# address | Pushgateway address | IP | 127.0.0.1 +
#----------------------+------------------------------------------------------------+------------+-----------------+
# port | Pushgateway port, port range (1024, 65535) | Integer | 9091 |
#----------------------+------------------------------------------------------------+------------+-----------------+
metric_config:
enable_monitor: false
collector: prometheus
prometheus_config:
port: 8080
address: 127.0.0.1
port: 9091
#----------------------+------------------------------------------------------------+------------+-----------------+
# Cache Config | Description | Type | Default |
......
......@@ -20,7 +20,7 @@ version: 0.1
#----------------------+------------------------------------------------------------+------------+-----------------+
# Server Config | Description | Type | Default |
#----------------------+------------------------------------------------------------+------------+-----------------+
# address | IP address that Milvus server monitors. | String | 0.0.0.0 |
# address | IP address that Milvus server monitors. | IP | 0.0.0.0 |
#----------------------+------------------------------------------------------------+------------+-----------------+
# port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 |
#----------------------+------------------------------------------------------------+------------+-----------------+
......@@ -68,7 +68,7 @@ db_config:
#----------------------+------------------------------------------------------------+------------+-----------------+
# minio_enable | Enable MinIO storage or not. | Boolean | false |
#----------------------+------------------------------------------------------------+------------+-----------------+
# minio_address | MinIO storage service IP address. | String | 127.0.0.1 |
# minio_address | MinIO storage service IP address. | Ip | 127.0.0.1 |
#----------------------+------------------------------------------------------------+------------+-----------------+
# minio_port | MinIO storage service port. Port range (1024, 65535) | Integer | 9000 |
#----------------------+------------------------------------------------------------+------------+-----------------+
......@@ -95,13 +95,16 @@ storage_config:
#----------------------+------------------------------------------------------------+------------+-----------------+
# collector | Connected monitoring system to collect metrics. | String | Prometheus |
#----------------------+------------------------------------------------------------+------------+-----------------+
# port | Port to visit Prometheus, port range (1024, 65535) | Integer | 8080 |
# address | Pushgateway address | IP | 127.0.0.1 +
#----------------------+------------------------------------------------------------+------------+-----------------+
# port | Pushgateway port, port range (1024, 65535) | Integer | 9091 |
#----------------------+------------------------------------------------------------+------------+-----------------+
metric_config:
enable_monitor: false
collector: prometheus
prometheus_config:
port: 8080
address: 127.0.0.1
port: 9091
#----------------------+------------------------------------------------------------+------------+-----------------+
# Cache Config | Description | Type | Default |
......
......@@ -137,6 +137,7 @@ set(prometheus_lib
prometheus-cpp-push
prometheus-cpp-pull
prometheus-cpp-core
curl
)
set(boost_lib
......
......@@ -631,6 +631,7 @@ DBImpl::StartMetricTask() {
server::Metrics::GetInstance().CPUCoreUsagePercentSet();
server::Metrics::GetInstance().GPUTemperature();
server::Metrics::GetInstance().CPUTemperature();
server::Metrics::GetInstance().PushToGateway();
// ENGINE_LOG_TRACE << "Metric task finished";
}
......
......@@ -18,7 +18,7 @@
#pragma once
#include "SystemInfo.h"
#include "utils/Error.h"
#include "utils/Status.h"
#include <string>
......@@ -32,8 +32,9 @@ class MetricsBase {
return instance;
}
virtual ErrorCode
virtual Status
Init() {
return Status::OK();
}
virtual void
......@@ -203,6 +204,10 @@ class MetricsBase {
virtual void
CPUTemperature() {
}
virtual void
PushToGateway() {
}
};
} // namespace server
......
......@@ -27,39 +27,48 @@
namespace milvus {
namespace server {
ErrorCode
Status
PrometheusMetrics::Init() {
try {
Config& config = Config::GetInstance();
Status s = config.GetMetricConfigEnableMonitor(startup_);
if (!s.ok()) {
return s.code();
return s;
}
if (!startup_) {
return SERVER_SUCCESS;
return Status::OK();
}
// Following should be read from config file.
std::string bind_address;
s = config.GetMetricConfigPrometheusPort(bind_address);
std::string push_port, push_address;
s = config.GetMetricConfigPrometheusPort(push_port);
if (!s.ok()) {
return s.code();
return s;
}
s = config.GetMetricConfigPrometheusAddress(push_address);
if (!s.ok()) {
return s;
}
const std::string uri = std::string("/metrics");
const std::size_t num_threads = 2;
auto labels = prometheus::Gateway::GetInstanceLabel("pushgateway");
// Init pushgateway
gateway_ = std::make_shared<prometheus::Gateway>(push_address, push_port, "milvus_metrics", labels);
// Init Exposer
exposer_ptr_ = std::make_shared<prometheus::Exposer>(bind_address, uri, num_threads);
// exposer_ptr_ = std::make_shared<prometheus::Exposer>(bind_address, uri, num_threads);
// Exposer Registry
exposer_ptr_->RegisterCollectable(registry_);
// Pushgateway Registry
gateway_->RegisterCollectable(registry_);
} catch (std::exception& ex) {
SERVER_LOG_ERROR << "Failed to connect prometheus server: " << std::string(ex.what());
return SERVER_UNEXPECTED_ERROR;
return Status(SERVER_UNEXPECTED_ERROR, ex.what());
}
return SERVER_SUCCESS;
return Status::OK();
}
void
......
......@@ -18,6 +18,7 @@
#pragma once
#include <prometheus/exposer.h>
#include <prometheus/gateway.h>
#include <prometheus/registry.h>
#include <iostream>
#include <memory>
......@@ -25,7 +26,8 @@
#include <vector>
#include "metrics/MetricBase.h"
#include "utils/Error.h"
#include "utils/Log.h"
#include "utils/Status.h"
#define METRICS_NOW_TIME std::chrono::system_clock::now()
//#define server::Metrics::GetInstance() server::GetInstance()
......@@ -42,11 +44,11 @@ class PrometheusMetrics : public MetricsBase {
return instance;
}
ErrorCode
Init();
Status
Init() override;
private:
std::shared_ptr<prometheus::Exposer> exposer_ptr_;
std::shared_ptr<prometheus::Gateway> gateway_;
std::shared_ptr<prometheus::Registry> registry_ = std::make_shared<prometheus::Registry>();
bool startup_ = false;
......@@ -293,9 +295,18 @@ class PrometheusMetrics : public MetricsBase {
void
CPUTemperature() override;
std::shared_ptr<prometheus::Exposer>&
exposer_ptr() {
return exposer_ptr_;
void
PushToGateway() override {
if (startup_) {
if (gateway_->Push() != 200) {
ENGINE_LOG_WARNING << "Metrics pushgateway failed";
}
}
}
std::shared_ptr<prometheus::Gateway>&
gateway() {
return gateway_;
}
// prometheus::Exposer& exposer() { return exposer_;}
......
......@@ -134,6 +134,9 @@ Config::ValidateConfig() {
std::string metric_collector;
CONFIG_CHECK(GetMetricConfigCollector(metric_collector));
std::string metric_prometheus_address;
CONFIG_CHECK(GetMetricConfigPrometheusAddress(metric_prometheus_address));
std::string metric_prometheus_port;
CONFIG_CHECK(GetMetricConfigPrometheusPort(metric_prometheus_port));
......@@ -214,6 +217,7 @@ Config::ResetDefaultConfig() {
/* metric config */
CONFIG_CHECK(SetMetricConfigEnableMonitor(CONFIG_METRIC_ENABLE_MONITOR_DEFAULT));
CONFIG_CHECK(SetMetricConfigCollector(CONFIG_METRIC_COLLECTOR_DEFAULT));
CONFIG_CHECK(SetMetricConfigPrometheusAddress(CONFIG_METRIC_PROMETHEUS_ADDRESS_DEFAULT));
CONFIG_CHECK(SetMetricConfigPrometheusPort(CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT));
/* cache config */
......@@ -556,6 +560,16 @@ Config::CheckMetricConfigCollector(const std::string& value) {
return Status::OK();
}
Status
Config::CheckMetricConfigPrometheusAddress(const std::string& value) {
if (!ValidationUtil::ValidateIpAddress(value).ok()) {
std::string msg =
"Invalid metric ip: " + value + ". Possible reason: metric_config.prometheus_config.ip is invalid.";
return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_ip: " + value);
}
return Status::OK();
}
Status
Config::CheckMetricConfigPrometheusPort(const std::string& value) {
if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
......@@ -999,6 +1013,12 @@ Config::GetMetricConfigCollector(std::string& value) {
return Status::OK();
}
Status
Config::GetMetricConfigPrometheusAddress(std::string& value) {
value = GetConfigStr(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_ADDRESS, CONFIG_METRIC_PROMETHEUS_ADDRESS_DEFAULT);
return Status::OK();
}
Status
Config::GetMetricConfigPrometheusPort(std::string& value) {
value = GetConfigStr(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_PORT, CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT);
......@@ -1272,6 +1292,12 @@ Config::SetMetricConfigCollector(const std::string& value) {
return SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_COLLECTOR, value);
}
Status
Config::SetMetricConfigPrometheusAddress(const std::string& value) {
CONFIG_CHECK(CheckMetricConfigPrometheusAddress(value));
SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_ADDRESS, value);
}
Status
Config::SetMetricConfigPrometheusPort(const std::string& value) {
CONFIG_CHECK(CheckMetricConfigPrometheusPort(value));
......
......@@ -98,8 +98,10 @@ static const char* CONFIG_METRIC_ENABLE_MONITOR_DEFAULT = "false";
static const char* CONFIG_METRIC_COLLECTOR = "collector";
static const char* CONFIG_METRIC_COLLECTOR_DEFAULT = "prometheus";
static const char* CONFIG_METRIC_PROMETHEUS = "prometheus_config";
static const char* CONFIG_METRIC_PROMETHEUS_ADDRESS = "address";
static const char* CONFIG_METRIC_PROMETHEUS_ADDRESS_DEFAULT = "127.0.0.1";
static const char* CONFIG_METRIC_PROMETHEUS_PORT = "port";
static const char* CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT = "8080";
static const char* CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT = "9091";
/* engine config */
static const char* CONFIG_ENGINE = "engine_config";
......@@ -212,6 +214,8 @@ class Config {
Status
CheckMetricConfigCollector(const std::string& value);
Status
CheckMetricConfigPrometheusAddress(const std::string& value);
Status
CheckMetricConfigPrometheusPort(const std::string& value);
/* cache config */
......@@ -300,6 +304,8 @@ class Config {
Status
GetMetricConfigCollector(std::string& value);
Status
GetMetricConfigPrometheusAddress(std::string& value);
Status
GetMetricConfigPrometheusPort(std::string& value);
/* cache config */
......@@ -382,6 +388,8 @@ class Config {
Status
SetMetricConfigCollector(const std::string& value);
Status
SetMetricConfigPrometheusAddress(const std::string& value);
Status
SetMetricConfigPrometheusPort(const std::string& value);
/* cache config */
......
......@@ -55,6 +55,7 @@ class NoReusePortOption : public ::grpc::ServerBuilderOption {
void
UpdateArguments(::grpc::ChannelArguments* args) override {
args->SetInt(GRPC_ARG_ALLOW_REUSEPORT, 0);
args->SetInt(GRPC_ARG_MAX_CONCURRENT_STREAMS, 20);
}
void
......
......@@ -62,5 +62,6 @@ TEST(MetricbaseTest, METRICBASE_TEST) {
instance.ConnectionGaugeIncrement();
instance.ConnectionGaugeDecrement();
instance.KeepingAliveCounterIncrement();
instance.PushToGateway();
instance.OctetsSet();
}
......@@ -67,6 +67,7 @@ TEST(PrometheusTest, PROMETHEUS_TEST) {
instance.ConnectionGaugeIncrement();
instance.ConnectionGaugeDecrement();
instance.KeepingAliveCounterIncrement();
instance.PushToGateway();
instance.OctetsSet();
instance.CPUCoreUsagePercentSet();
......
......@@ -218,6 +218,10 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) {
ASSERT_TRUE(config.GetMetricConfigCollector(str_val).ok());
ASSERT_TRUE(str_val == metric_collector);
std::string metric_prometheus_address = "127.0.0.1";
ASSERT_TRUE(config.GetMetricConfigPrometheusAddress(str_val).ok());
ASSERT_TRUE(str_val == metric_prometheus_address);
std::string metric_prometheus_port = "2222";
ASSERT_TRUE(config.SetMetricConfigPrometheusPort(metric_prometheus_port).ok());
ASSERT_TRUE(config.GetMetricConfigPrometheusPort(str_val).ok());
......@@ -298,12 +302,14 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) {
#endif
}
std::string gen_get_command(const std::string& parent_node, const std::string& child_node) {
std::string
gen_get_command(const std::string& parent_node, const std::string& child_node) {
std::string cmd = "get_config " + parent_node + ms::CONFIG_NODE_DELIMITER + child_node;
return cmd;
}
std::string gen_set_command(const std::string& parent_node, const std::string& child_node, const std::string& value) {
std::string
gen_set_command(const std::string& parent_node, const std::string& child_node, const std::string& value) {
std::string cmd = "set_config " + parent_node + ms::CONFIG_NODE_DELIMITER + child_node + " " + value;
return cmd;
}
......@@ -519,6 +525,8 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) {
ASSERT_FALSE(config.SetMetricConfigCollector("zilliz").ok());
ASSERT_FALSE(config.SetMetricConfigPrometheusAddress("127.0.0").ok());
ASSERT_FALSE(config.SetMetricConfigPrometheusPort("0xff").ok());
/* cache config */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册