diff --git a/pkg/models/metrics/metrics.go b/pkg/models/metrics/metrics.go index 9bb65c98a43c4acd6bf2de7ed467e8bfc91e39a2..a250ab582eff00e72ba26a2f546d34a41a1d2261 100644 --- a/pkg/models/metrics/metrics.go +++ b/pkg/models/metrics/metrics.go @@ -750,7 +750,39 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour go func(metricName string) { queryType, params := AssembleComponentRequestInfo(monitoringRequest, metricName) metricsStr := client.SendMonitoringRequest(client.SecondaryPrometheusEndpoint, queryType, params) - ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": monitoringRequest.ComponentName}) + formattedJson := ReformatJson(metricsStr, metricName, map[string]string{"resource_name": monitoringRequest.ComponentName}) + + if metricName == "etcd_server_list" { + + nodeMap := make(map[string]string, 0) + + nodeAddress := GetNodeAddressInfo() + for nodeName, nodeInfo := range *nodeAddress { + + var nodeIp string + for _, item := range nodeInfo { + if item.Type == v1.NodeInternalIP { + nodeIp = item.Address + break + } + } + + nodeMap[nodeIp] = nodeName + } + + // add node_name label to metrics + for i := 0; i < len(formattedJson.Data.Result); i++ { + metricDesc := formattedJson.Data.Result[i][ResultItemMetric] + metricDescMap, ensure := metricDesc.(map[string]interface{}) + if ensure { + if nodeIp, exist := metricDescMap[ResultItemMetricNodeIp]; exist { + metricDescMap[ResultItemMetricNodeName] = nodeMap[nodeIp.(string)] + } + } + } + } + + ch <- formattedJson wg.Done() }(metricName) } diff --git a/pkg/models/metrics/metricsruleconst.go b/pkg/models/metrics/metricsruleconst.go index 2d95dba17a23ecf8744e858eacb1eac35221e9f5..10fc80516186af72a123739a07811899ab9a962a 100644 --- a/pkg/models/metrics/metricsruleconst.go +++ b/pkg/models/metrics/metricsruleconst.go @@ -21,6 +21,8 @@ const ( MetricStatusSuccess = "success" ResultItemMetric = "metric" ResultItemMetricResource = "resource" + ResultItemMetricNodeIp = "node_ip" + ResultItemMetricNodeName = "node_name" ResultItemValue = "value" ResultItemValues = "values" ResultSortTypeDesc = "desc" @@ -670,26 +672,26 @@ var RulePromQLTmplMap = MetricMap{ "workspace_pod_abnormal_ratio": `sum(kube_pod_status_phase{phase=~"Failed|Pending|Unknown", namespace!="", namespace$1}) / sum(kube_pod_status_phase{phase!~"Succeeded", namespace!="", namespace$1})`, // component - "etcd_server_list": `label_replace(up{job="etcd"}, "ip", "$1", "instance", "(.*):.*")`, + "etcd_server_list": `label_replace(up{job="etcd"}, "node_ip", "$1", "instance", "(.*):.*")`, "etcd_server_total": `count(up{job="etcd"})`, "etcd_server_up_total": `etcd:up:sum`, - "etcd_server_has_leader": `etcd_server_has_leader`, - "etcd_server_leader_changes": `etcd:etcd_server_leader_changes_seen:sum_changes`, - "etcd_server_proposals_failed_rate": `etcd:etcd_server_proposals_failed:sum_irate`, - "etcd_server_proposals_applied_rate": `etcd:etcd_server_proposals_applied:sum_irate`, - "etcd_server_proposals_committed_rate": `etcd:etcd_server_proposals_committed:sum_irate`, - "etcd_server_proposals_pending_count": `etcd:etcd_server_proposals_pending:sum`, - "etcd_mvcc_db_size": `etcd:etcd_debugging_mvcc_db_total_size:sum`, - "etcd_network_client_grpc_received_bytes": `etcd:etcd_network_client_grpc_received_bytes:sum_irate`, - "etcd_network_client_grpc_sent_bytes": `etcd:etcd_network_client_grpc_sent_bytes:sum_irate`, - "etcd_grpc_call_rate": `etcd:grpc_server_started:sum_irate`, - "etcd_grpc_call_failed_rate": `etcd:grpc_server_handled:sum_irate`, - "etcd_grpc_server_msg_received_rate": `etcd:grpc_server_msg_received:sum_irate`, - "etcd_grpc_server_msg_sent_rate": `etcd:grpc_server_msg_sent:sum_irate`, - "etcd_disk_wal_fsync_duration": `etcd:etcd_disk_wal_fsync_duration:avg`, - "etcd_disk_wal_fsync_duration_quantile": `etcd:etcd_disk_wal_fsync_duration:histogram_quantile`, - "etcd_disk_backend_commit_duration": `etcd:etcd_disk_backend_commit_duration:avg`, - "etcd_disk_backend_commit_duration_quantile": `etcd:etcd_disk_backend_commit_duration:histogram_quantile`, + "etcd_server_has_leader": `label_replace(etcd_server_has_leader, "node_ip", "$1", "instance", "(.*):.*")`, + "etcd_server_leader_changes": `label_replace(etcd:etcd_server_leader_changes_seen:sum_changes, "node_ip", "$1", "node", "(.*)")`, + "etcd_server_proposals_failed_rate": `avg(etcd:etcd_server_proposals_failed:sum_irate)`, + "etcd_server_proposals_applied_rate": `avg(etcd:etcd_server_proposals_applied:sum_irate)`, + "etcd_server_proposals_committed_rate": `avg(etcd:etcd_server_proposals_committed:sum_irate)`, + "etcd_server_proposals_pending_count": `avg(etcd:etcd_server_proposals_pending:sum)`, + "etcd_mvcc_db_size": `avg(etcd:etcd_debugging_mvcc_db_total_size:sum)`, + "etcd_network_client_grpc_received_bytes": `sum(etcd:etcd_network_client_grpc_received_bytes:sum_irate)`, + "etcd_network_client_grpc_sent_bytes": `sum(etcd:etcd_network_client_grpc_sent_bytes:sum_irate)`, + "etcd_grpc_call_rate": `sum(etcd:grpc_server_started:sum_irate)`, + "etcd_grpc_call_failed_rate": `sum(etcd:grpc_server_handled:sum_irate)`, + "etcd_grpc_server_msg_received_rate": `sum(etcd:grpc_server_msg_received:sum_irate)`, + "etcd_grpc_server_msg_sent_rate": `sum(etcd:grpc_server_msg_sent:sum_irate)`, + "etcd_disk_wal_fsync_duration": `avg(etcd:etcd_disk_wal_fsync_duration:avg)`, + "etcd_disk_wal_fsync_duration_quantile": `avg(etcd:etcd_disk_wal_fsync_duration:histogram_quantile) by (quantile)`, + "etcd_disk_backend_commit_duration": `avg(etcd:etcd_disk_backend_commit_duration:avg)`, + "etcd_disk_backend_commit_duration_quantile": `avg(etcd:etcd_disk_backend_commit_duration:histogram_quantile) by (quantile)`, "apiserver_up_sum": `apiserver:up:sum`, "apiserver_request_rate": `apiserver:apiserver_request_count:sum_irate`,