diff --git a/run.sh b/run.sh index 57ec95d7e85c11c64292c334ff91277199e513b2..c0075764ccf6678968395bb8e6b998ccab7470ca 100755 --- a/run.sh +++ b/run.sh @@ -670,10 +670,11 @@ function run_start_onebox() for i in $(seq ${META_COUNT}) do meta_port=$((34600+i)) + prometheus_port=$((9091+i)) mkdir -p meta$i; cd meta$i ln -s -f ${SERVER_PATH}/pegasus_server pegasus_server - sed "s/@META_PORT@/$meta_port/;s/@REPLICA_PORT@/34800/" ${ROOT}/config-server.ini >config.ini + sed "s/@META_PORT@/$meta_port/;s/@REPLICA_PORT@/34800/;s/@PROMETHEUS_PORT@/$prometheus_port/" ${ROOT}/config-server.ini >config.ini echo "cd `pwd` && $PWD/pegasus_server config.ini -app_list meta &>result &" $PWD/pegasus_server config.ini -app_list meta &>result & PID=$! @@ -682,11 +683,12 @@ function run_start_onebox() done for j in $(seq ${REPLICA_COUNT}) do + prometheus_port=$((9091+${META_COUNT}+j)) replica_port=$((34800+j)) mkdir -p replica$j cd replica$j ln -s -f ${SERVER_PATH}/pegasus_server pegasus_server - sed "s/@META_PORT@/34600/;s/@REPLICA_PORT@/$replica_port/" ${ROOT}/config-server.ini >config.ini + sed "s/@META_PORT@/34600/;s/@REPLICA_PORT@/$replica_port/;s/@PROMETHEUS_PORT@/$prometheus_port/" ${ROOT}/config-server.ini >config.ini echo "cd `pwd` && $PWD/pegasus_server config.ini -app_list replica &>result &" $PWD/pegasus_server config.ini -app_list replica &>result & PID=$! @@ -698,7 +700,7 @@ function run_start_onebox() mkdir -p collector cd collector ln -s -f ${SERVER_PATH}/pegasus_server pegasus_server - sed "s/@META_PORT@/34600/;s/@REPLICA_PORT@/34800/" ${ROOT}/config-server.ini >config.ini + sed "s/@META_PORT@/34600/;s/@REPLICA_PORT@/34800/;s/@PROMETHEUS_PORT@/9091/" ${ROOT}/config-server.ini >config.ini echo "cd `pwd` && $PWD/pegasus_server config.ini -app_list collector &>result &" $PWD/pegasus_server config.ini -app_list collector &>result & PID=$! diff --git a/src/reporter/CMakeLists.txt b/src/reporter/CMakeLists.txt index 55dedbb731b8c5e84169f9af58603b392b56c1b0..ea325e539e15257751f3fc847f95c6861a4efece 100644 --- a/src/reporter/CMakeLists.txt +++ b/src/reporter/CMakeLists.txt @@ -16,12 +16,12 @@ find_package(prometheus-cpp)#TODO(huangwei5): make it optional # the INTERFACE_LINK_LIBRARIES of prometheus contains the absolute path of libcurl # when we use the compiled prometheus-cpp libs, the path of libcurl should be our own path find_package(CURL) -get_target_property(_libs prometheus-cpp::push INTERFACE_LINK_LIBRARIES) +get_target_property(_libs prometheus-cpp::pull INTERFACE_LINK_LIBRARIES) string(REGEX REPLACE ";/.*libcurl\.a" ";${CURL_LIBRARIES}" _libs "${_libs}") -set_target_properties(prometheus-cpp::push PROPERTIES INTERFACE_LINK_LIBRARIES "${_libs}") +set_target_properties(prometheus-cpp::pull PROPERTIES INTERFACE_LINK_LIBRARIES "${_libs}") dsn_add_static_library() target_link_libraries(${MY_PROJ_NAME} PUBLIC pegasus_base - prometheus-cpp::push + prometheus-cpp::pull ) # TODO(huangwei5): dsn_add_static_library doesnt link libs, need fix diff --git a/src/reporter/pegasus_counter_reporter.cpp b/src/reporter/pegasus_counter_reporter.cpp index e6aea2c63c84542627d429f14b2cfc1325637292..1344fbdadbc88782ca3f1ba54611abe6f3333b49 100644 --- a/src/reporter/pegasus_counter_reporter.cpp +++ b/src/reporter/pegasus_counter_reporter.cpp @@ -8,7 +8,6 @@ #include #include #include - #include #include @@ -21,10 +20,11 @@ #include #include #include +#include using namespace ::dsn; -static std::string GetHostName() +static std::string get_hostname() { char hostname[1024]; @@ -34,9 +34,20 @@ static std::string GetHostName() return hostname; } -static void change_metrics_name(std::string &metrics_name) +static std::string get_hostip() +{ + uint32_t ip = dsn::rpc_address::ipv4_from_network_interface(""); + uint32_t ipnet = htonl(ip); + char buffer[512]; + memset(buffer, 0, sizeof(buffer)); + assert(inet_ntop(AF_INET, &ipnet, buffer, sizeof(buffer))); + return buffer; +} + +static void format_metrics_name(std::string &metrics_name) { replace(metrics_name.begin(), metrics_name.end(), '@', ':'); + replace(metrics_name.begin(), metrics_name.end(), '#', ':'); replace(metrics_name.begin(), metrics_name.end(), '.', '_'); replace(metrics_name.begin(), metrics_name.end(), '*', '_'); replace(metrics_name.begin(), metrics_name.end(), '(', '_'); @@ -65,8 +76,7 @@ pegasus_counter_reporter::pegasus_counter_reporter() _update_interval_seconds(0), _last_report_time_ms(0), _enable_logging(false), - _enable_falcon(false), - _enable_prometheus(false), + _perf_counter_sink(perf_counter_sink_t::INVALID), _falcon_port(0), _prometheus_port(0) { @@ -76,17 +86,13 @@ pegasus_counter_reporter::~pegasus_counter_reporter() { stop(); } void pegasus_counter_reporter::prometheus_initialize() { - _prometheus_host = dsn_config_get_value_string( - "pegasus.server", "prometheus_host", "127.0.0.1", "prometheus gateway host"); _prometheus_port = (uint16_t)dsn_config_get_value_uint64( "pegasus.server", "prometheus_port", 9091, "prometheus gateway port"); - ddebug("prometheus initialize: host:port(%s:%d)", _prometheus_host.c_str(), _prometheus_port); - const auto &labels = prometheus::Gateway::GetInstanceLabel(GetHostName()); - _gateway = std::make_shared( - _prometheus_host, std::to_string(_prometheus_port), "pegasus", labels); _registry = std::make_shared(); - _gateway->RegisterCollectable(_registry); + _exposer = dsn::make_unique( + fmt::format("{}:{}", get_hostip().c_str(), _prometheus_port)); + _exposer->RegisterCollectable(_registry); } void pegasus_counter_reporter::falcon_initialize() @@ -148,18 +154,22 @@ void pegasus_counter_reporter::start() _enable_logging = dsn_config_get_value_bool( "pegasus.server", "perf_counter_enable_logging", true, "perf_counter_enable_logging"); - _enable_falcon = dsn_config_get_value_bool( - "pegasus.server", "perf_counter_enable_falcon", false, "perf_counter_enable_falcon"); - _enable_prometheus = dsn_config_get_value_bool("pegasus.server", - "perf_counter_enable_prometheus", - false, - "perf_counter_enable_prometheus"); - - if (_enable_falcon) { + + std::string perf_counter_sink = + dsn_config_get_value_string("pegasus.server", "perf_counter_sink", "", "perf_counter_sink"); + if ("prometheus" == perf_counter_sink) { + _perf_counter_sink = perf_counter_sink_t::PROMETHEUS; + } else if ("falcon" == perf_counter_sink) { + _perf_counter_sink = perf_counter_sink_t::FALCON; + } else { + _perf_counter_sink = perf_counter_sink_t::INVALID; + } + + if (perf_counter_sink_t::FALCON == _perf_counter_sink) { falcon_initialize(); } - if (_enable_prometheus) { + if (perf_counter_sink_t::PROMETHEUS == _perf_counter_sink) { prometheus_initialize(); } @@ -178,6 +188,8 @@ void pegasus_counter_reporter::stop() if (_report_timer != nullptr) { _report_timer->cancel(); } + _exposer = nullptr; + _registry = nullptr; } void pegasus_counter_reporter::update_counters_to_falcon(const std::string &result, @@ -207,7 +219,7 @@ void pegasus_counter_reporter::update() ddebug("%s", oss.str().c_str()); } - if (_enable_falcon) { + if (perf_counter_sink_t::FALCON == _perf_counter_sink) { std::stringstream oss; oss << "["; @@ -229,20 +241,21 @@ void pegasus_counter_reporter::update() update_counters_to_falcon(oss.str(), timestamp); } - if (_enable_prometheus) { - perf_counters::instance().iterate_snapshot([this]( + if (perf_counter_sink_t::PROMETHEUS == _perf_counter_sink) { + const std::string hostname = get_hostname(); + perf_counters::instance().iterate_snapshot([&hostname, this]( const dsn::perf_counters::counter_snapshot &cs) { std::string metrics_name = cs.name; // prometheus metric_name don't support characters like .*()@, it only support ":" // and "_" // so change the name to make it all right - change_metrics_name(metrics_name); + format_metrics_name(metrics_name); // split metric_name like "collector_app_pegasus_app_stat_multi_put_qps:1_0_p999" or // "collector_app_pegasus_app_stat_multi_put_qps:1_0" - // app[0] = "1" which is the app_id - // app[1] = "0" which is the partition_cout + // app[0] = "1" which is the app(app name or app id) + // app[1] = "0" which is the partition_index // app[2] = "p999" or "" which represent the percent std::string app[3] = {"", "", ""}; std::list lv; @@ -256,14 +269,25 @@ void pegasus_counter_reporter::update() i++; } } + /** + * deal with corner case, for example: + * replica*eon.replica*table.level.RPC_RRDB_RRDB_GET.latency(ns)@${table_name}.p999 + * in this case, app[0] = app name, app[1] = p999, app[2] = "" + **/ + if ("p999" == app[1]) { + app[2] = app[1]; + app[1].clear(); + } // create metrics that prometheus support to report data + metrics_name = lv.front() + app[2]; std::map *>::iterator it = _gauge_family_map.find(metrics_name); if (it == _gauge_family_map.end()) { auto &add_gauge_family = prometheus::BuildGauge() .Name(metrics_name) .Labels({{"service", "pegasus"}, + {"host_name", hostname}, {"cluster", _cluster_name}, {"pegasus_job", _app_name}, {"port", std::to_string(_local_port)}}) @@ -274,13 +298,9 @@ void pegasus_counter_reporter::update() .first; } - auto &second_gauge = it->second->Add( - {{"app_id", app[0]}, {"partition_count", app[1]}, {"percent", app[2]}}); + auto &second_gauge = it->second->Add({{"app", app[0]}, {"partition", app[1]}}); second_gauge.Set(cs.value); }); - - // report data to pushgateway - _gateway->Push(); } ddebug("update now_ms(%lld), last_report_time_ms(%lld)", now, _last_report_time_ms); diff --git a/src/reporter/pegasus_counter_reporter.h b/src/reporter/pegasus_counter_reporter.h index 48a9ff33dee288ed20dce19a6142662b0db23c1c..6b4d14f6ed4e0c957e9806a1aa026bdbea0308ec 100644 --- a/src/reporter/pegasus_counter_reporter.h +++ b/src/reporter/pegasus_counter_reporter.h @@ -16,7 +16,7 @@ #include #include -#include +#include namespace pegasus { namespace server { @@ -35,6 +35,13 @@ struct falcon_metric DEFINE_JSON_SERIALIZATION(endpoint, metric, timestamp, step, value, counterType, tags) }; +enum class perf_counter_sink_t +{ + FALCON, + PROMETHEUS, + INVALID +}; + class pegasus_counter_reporter : public ::dsn::utils::singleton { public: @@ -72,8 +79,7 @@ private: // perf counter flags bool _enable_logging; - bool _enable_falcon; - bool _enable_prometheus; + perf_counter_sink_t _perf_counter_sink; // falcon relates std::string _falcon_host; @@ -82,10 +88,9 @@ private: falcon_metric _falcon_metric; // prometheus relates - std::string _prometheus_host; uint16_t _prometheus_port; std::shared_ptr _registry; - std::shared_ptr _gateway; + std::unique_ptr _exposer; std::map *> _gauge_family_map; }; } diff --git a/src/server/config.ini b/src/server/config.ini index aef8a94694cd00a59dd2dea8be3cbcd298e301cf..dd8d79b8dc073d0ba0ffde534ece5fda2f5f8a78 100644 --- a/src/server/config.ini +++ b/src/server/config.ini @@ -289,8 +289,11 @@ perf_counter_update_interval_seconds = 10 perf_counter_enable_logging = false - perf_counter_enable_falcon = false - perf_counter_enable_prometheus = false + # Where the metrics are collected. If no value is given, no sink is used. + # Options: + # - falcon + # - prometheus + perf_counter_sink = perf_counter_read_capacity_unit_size = 4096 perf_counter_write_capacity_unit_size = 4096 @@ -298,7 +301,7 @@ falcon_port = 1988 falcon_path = /v1/push - prometheus_host = 127.0.0.1 + # The HTTP port exposed to Prometheus for pulling metrics from pegasus server. prometheus_port = 9091 [pegasus.collector] diff --git a/src/server/config.min.ini b/src/server/config.min.ini index 0fa029eb9d96ffdca44d859336aa733c790a0e14..f46bd60167f32f4b49d03ec2cacc10034297ec43 100644 --- a/src/server/config.min.ini +++ b/src/server/config.min.ini @@ -114,6 +114,13 @@ [pegasus.server] perf_counter_enable_logging = false + # Where the metrics are collected. If no value is given, no sink is used. + # Options: + # - falcon + # - prometheus + perf_counter_sink = + # The HTTP port exposed to Prometheus for pulling metrics from pegasus server. + prometheus_port = @PROMETHEUS_PORT@ [pegasus.collector] available_detect_app = @APP_NAME@