diff --git a/pkg/apis/monitoring/v1alpha2/register.go b/pkg/apis/monitoring/v1alpha2/register.go index ac939ffe6fbbbe0ac7a9a9f96c4d38e88d3adbc3..9862b4f5ea8f1e093890e34ec5b95057ca577153 100644 --- a/pkg/apis/monitoring/v1alpha2/register.go +++ b/pkg/apis/monitoring/v1alpha2/register.go @@ -247,6 +247,13 @@ func addWebService(c *restful.Container) error { Consumes(restful.MIME_JSON, restful.MIME_XML). Produces(restful.MIME_JSON) + ws.Route(ws.GET("/components/{component}").To(monitoring.MonitorComponent). + Doc("monitor component level metrics"). + Param(ws.QueryParameter("metrics_filter", "metrics names in re2 regex").DataType("string").Required(false).DefaultValue("")). + Metadata(restfulspec.KeyOpenAPITags, tags)). + Consumes(restful.MIME_JSON, restful.MIME_XML). + Produces(restful.MIME_JSON) + c.Add(ws) return nil } diff --git a/pkg/apiserver/monitoring/monitoring.go b/pkg/apiserver/monitoring/monitoring.go index ca9f9729421e840f6443421f29f9856df46d09b1..d1a9887b33b626000368ea0e83e95cbbf86c5c59 100644 --- a/pkg/apiserver/monitoring/monitoring.go +++ b/pkg/apiserver/monitoring/monitoring.go @@ -32,7 +32,7 @@ func MonitorPod(request *restful.Request, response *restful.Response) { queryType, params, nullRule := metrics.AssemblePodMetricRequestInfo(requestParams, metricName) var res *metrics.FormatedMetric if !nullRule { - metricsStr := prometheus.SendMonitoringRequest(queryType, params) + metricsStr := prometheus.SendMonitoringRequest(prometheus.PrometheusEndpoint, queryType, params) res = metrics.ReformatJson(metricsStr, metricName, map[string]string{"pod_name": ""}) } response.WriteAsJson(res) @@ -154,7 +154,7 @@ func MonitorCluster(request *restful.Request, response *restful.Response) { if metricName != "" { // single queryType, params := metrics.AssembleClusterMetricRequestInfo(requestParams, metricName) - metricsStr := prometheus.SendMonitoringRequest(queryType, params) + metricsStr := prometheus.SendMonitoringRequest(prometheus.PrometheusEndpoint, queryType, params) res := metrics.ReformatJson(metricsStr, metricName, map[string]string{"cluster": "local"}) response.WriteAsJson(res) @@ -172,7 +172,7 @@ func MonitorNode(request *restful.Request, response *restful.Response) { if metricName != "" { // single queryType, params := metrics.AssembleNodeMetricRequestInfo(requestParams, metricName) - metricsStr := prometheus.SendMonitoringRequest(queryType, params) + metricsStr := prometheus.SendMonitoringRequest(prometheus.PrometheusEndpoint, queryType, params) res := metrics.ReformatJson(metricsStr, metricName, map[string]string{"node": ""}) // The raw node-exporter result doesn't include ip address information // Thereby, append node ip address to .data.result[].metric @@ -198,3 +198,15 @@ func MonitorNode(request *restful.Request, response *restful.Response) { response.WriteAsJson(pagedMetrics) } } + +func MonitorComponent(request *restful.Request, response *restful.Response) { + requestParams := prometheus.ParseMonitoringRequestParams(request) + + if requestParams.MetricsFilter == "" { + requestParams.MetricsFilter = requestParams.ComponentName + "_.*" + } + + rawMetrics := metrics.MonitorAllMetrics(requestParams, metrics.MetricLevelComponent) + + response.WriteAsJson(rawMetrics) +} diff --git a/pkg/models/metrics/metrics.go b/pkg/models/metrics/metrics.go index db3d7ab1ff09e4cc7ad8d8cb38cf590b14598fef..9bb65c98a43c4acd6bf2de7ed467e8bfc91e39a2 100644 --- a/pkg/models/metrics/metrics.go +++ b/pkg/models/metrics/metrics.go @@ -135,7 +135,7 @@ func getAllWorkspaces() map[string]int { paramValues := make(url.Values) paramValues.Set("query", WorkspaceNamespaceLabelRule) params := paramValues.Encode() - res := client.SendMonitoringRequest(client.DefaultQueryType, params) + res := client.SendMonitoringRequest(client.PrometheusEndpoint, client.DefaultQueryType, params) metric := ReformatJson(res, "", map[string]string{"workspace": "workspace"}) @@ -240,7 +240,7 @@ func AssembleSpecificWorkloadMetricRequestInfo(monitoringRequest *client.Monitor paramValues := monitoringRequest.Params params := makeRequestParamString(rule, paramValues) - res := client.SendMonitoringRequest(client.DefaultQueryType, params) + res := client.SendMonitoringRequest(client.PrometheusEndpoint, client.DefaultQueryType, params) podNamesFilter := getPodNameRegexInWorkload(res, podsFilter) @@ -306,7 +306,7 @@ func AddNodeAddressMetric(nodeMetric *FormatedMetric, nodeAddress *map[string][] func MonitorContainer(monitoringRequest *client.MonitoringRequestParams, metricName string) *FormatedMetric { queryType, params := AssembleContainerMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) res := ReformatJson(metricsStr, metricName, map[string]string{"container_name": ""}) return res } @@ -480,7 +480,7 @@ func collectWorkspaceMetric(monitoringRequest *client.MonitoringRequestParams, w go func(metricName string) { queryType, params := AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest, namespaceArray, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": ws}) // It's adding "resource_name" field wg.Done() @@ -526,7 +526,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour wg.Add(1) go func(metricName string) { queryType, params := AssembleClusterMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) ch <- ReformatJson(metricsStr, metricName, map[string]string{"cluster": "local"}) wg.Done() }(metricName) @@ -541,7 +541,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour wg.Add(1) go func(metricName string) { queryType, params := AssembleNodeMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) ch <- ReformatJson(metricsStr, metricName, map[string]string{"node": ""}) wg.Done() }(metricName) @@ -580,7 +580,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour go func(metricName string, namespace string) { queryType, params := AssembleNamespaceMetricRequestInfoByNamesapce(monitoringRequest, namespace, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) chForOneMetric <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": namespace}) wgForOneMetric.Done() }(metricName, ns) @@ -624,7 +624,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour wg.Add(1) go func(metricName string, workspace string) { queryType, params := AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest, namespaceArray, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": workspace}) wg.Done() }(metricName, workspace) @@ -642,7 +642,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour go func(metricName string) { queryType, params := AssembleAllWorkspaceMetricRequestInfo(monitoringRequest, nil, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) ch <- ReformatJson(metricsStr, metricName, map[string]string{"workspace": "workspaces"}) wg.Done() @@ -660,7 +660,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour go func(metricName string) { queryType, params := AssembleNamespaceMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) rawResult := ReformatJson(metricsStr, metricName, map[string]string{"namespace": ""}) ch <- rawResult @@ -679,7 +679,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour wg.Add(1) go func(metricName string) { queryType, params := AssembleAllWorkloadMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) reformattedResult := ReformatJson(metricsStr, metricName, map[string]string{"workload": ""}) // no need to append a null result ch <- reformattedResult @@ -696,7 +696,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour metricName = strings.TrimLeft(metricName, "workload_") queryType, params, nullRule := AssembleSpecificWorkloadMetricRequestInfo(monitoringRequest, metricName) if !nullRule { - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) fmtMetrics := ReformatJson(metricsStr, metricName, map[string]string{"pod_name": ""}) unifyMetricHistoryTimeRange(fmtMetrics) ch <- fmtMetrics @@ -716,7 +716,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour go func(metricName string) { queryType, params, nullRule := AssemblePodMetricRequestInfo(monitoringRequest, metricName) if !nullRule { - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) ch <- ReformatJson(metricsStr, metricName, map[string]string{"pod_name": ""}) } else { ch <- nil @@ -734,13 +734,28 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour wg.Add(1) go func(metricName string) { queryType, params := AssembleContainerMetricRequestInfo(monitoringRequest, metricName) - metricsStr := client.SendMonitoringRequest(queryType, params) + metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params) ch <- ReformatJson(metricsStr, metricName, map[string]string{"container_name": ""}) wg.Done() }(metricName) } } } + case MetricLevelComponent: + { + for _, metricName := range ComponentMetricsNames { + matched, err := regexp.MatchString(metricsFilter, metricName) + if err == nil && matched { + wg.Add(1) + go func(metricName string) { + queryType, params := AssembleComponentRequestInfo(monitoringRequest, metricName) + metricsStr := client.SendMonitoringRequest(client.SecondaryPrometheusEndpoint, queryType, params) + ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": monitoringRequest.ComponentName}) + wg.Done() + }(metricName) + } + } + } } wg.Wait() @@ -928,3 +943,12 @@ func AssembleNodeMetricRequestInfo(monitoringRequest *client.MonitoringRequestPa return queryType, params } + +func AssembleComponentRequestInfo(monitoringRequest *client.MonitoringRequestParams, metricName string) (string, string) { + queryType := monitoringRequest.QueryType + paramValues := monitoringRequest.Params + rule := MakeComponentRule(metricName) + params := makeRequestParamString(rule, paramValues) + + return queryType, params +} diff --git a/pkg/models/metrics/metricsrule.go b/pkg/models/metrics/metricsrule.go index 0bd9475766385c2b8fa7b060a13f627d8c3e3478..832751e8cf4cfb811f4f95ab7eaf9eec382fa6a1 100644 --- a/pkg/models/metrics/metricsrule.go +++ b/pkg/models/metrics/metricsrule.go @@ -238,3 +238,8 @@ func MakeNodeRule(nodeID string, nodesFilter string, metricsName string) string return rule } + +func MakeComponentRule(metricsName string) string { + var rule = RulePromQLTmplMap[metricsName] + return rule +} diff --git a/pkg/models/metrics/metricsruleconst.go b/pkg/models/metrics/metricsruleconst.go index 26833bac1008912445c1f8814555e92965ee6324..b97eb14748c26aa8770fde40bd66cb71359af45d 100644 --- a/pkg/models/metrics/metricsruleconst.go +++ b/pkg/models/metrics/metricsruleconst.go @@ -65,6 +65,7 @@ const ( MetricLevelContainer = "container" MetricLevelContainerName = "container_name" MetricLevelWorkload = "workload" + MetricLevelComponent = "component" ) const ( @@ -319,6 +320,58 @@ var ContainerMetricsNames = []string{ //"container_net_bytes_received", } +var ComponentMetricsNames = []string{ + "etcd_server_deployed_sum", + "etcd_server_up_sum", + "etcd_server_has_leader", + "etcd_server_leader_changes", + "etcd_server_proposals_failed_rate", + "etcd_server_proposals_applied_rate", + "etcd_server_proposals_committed_rate", + "etcd_server_proposals_pending_count", + "etcd_mvcc_db_size", + "etcd_network_client_grpc_received_bytes", + "etcd_network_client_grpc_sent_bytes", + "etcd_grpc_call_rate", + "etcd_grpc_call_failed_rate", + "etcd_grpc_server_msg_received_rate", + "etcd_grpc_server_msg_sent_rate", + "etcd_disk_wal_fsync_duration", + "etcd_disk_wal_fsync_duration_quantile", + "etcd_disk_backend_commit_duration", + "etcd_disk_backend_commit_duration_quantile", + + "apiserver_up_sum", + "apiserver_request_rate", + "apiserver_request_by_verb_rate", + "apiserver_request_latencies", + "apiserver_request_by_verb_latencies", + + "scheduler_up_sum", + "scheduler_schedule_attempts", + "scheduler_schedule_attempt_rate", + "scheduler_e2e_scheduling_latency", + "scheduler_e2e_scheduling_latency_quantile", + + "controller_manager_up_sum", + + "coredns_up_sum", + "coredns_cache_hits", + "coredns_cache_misses", + "coredns_dns_request_rate", + "coredns_dns_request_duration", + "coredns_dns_request_duration_quantile", + "coredns_dns_request_by_type_rate", + "coredns_dns_request_by_rcode_rate", + "coredns_panic_rate", + "coredns_proxy_request_rate", + "coredns_proxy_request_duration", + "coredns_proxy_request_duration_quantile", + + "prometheus_up_sum", + "prometheus_tsdb_head_samples_appended_rate", +} + var RulePromQLTmplMap = MetricMap{ //cluster "cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m", @@ -614,4 +667,55 @@ var RulePromQLTmplMap = MetricMap{ // New in ks 2.0 "workspace_pod_abnormal_ratio": `sum(kube_pod_status_phase{phase=~"Failed|Pending|Unknown", namespace!="", namespace$1}) / sum(kube_pod_status_phase{phase!~"Succeeded", namespace!="", namespace$1})`, + + // component + "etcd_server_deployed_sum": `count(up{job="etcd"})`, + "etcd_server_up_sum": `etcd:up:sum`, + "etcd_server_has_leader": `etcd_server_has_leader`, + "etcd_server_leader_changes": `etcd:etcd_server_leader_changes_seen:sum_changes`, + "etcd_server_proposals_failed_rate": `etcd:etcd_server_proposals_failed:sum_irate`, + "etcd_server_proposals_applied_rate": `etcd:etcd_server_proposals_applied:sum_irate`, + "etcd_server_proposals_committed_rate": `etcd:etcd_server_proposals_committed:sum_irate`, + "etcd_server_proposals_pending_count": `etcd:etcd_server_proposals_pending:sum`, + "etcd_mvcc_db_size": `etcd:etcd_debugging_mvcc_db_total_size:sum`, + "etcd_network_client_grpc_received_bytes": `etcd:etcd_network_client_grpc_received_bytes:sum_irate`, + "etcd_network_client_grpc_sent_bytes": `etcd:etcd_network_client_grpc_sent_bytes:sum_irate`, + "etcd_grpc_call_rate": `etcd:grpc_server_started:sum_irate`, + "etcd_grpc_call_failed_rate": `etcd:grpc_server_handled:sum_irate`, + "etcd_grpc_server_msg_received_rate": `etcd:grpc_server_msg_received:sum_irate`, + "etcd_grpc_server_msg_sent_rate": `etcd:grpc_server_msg_sent:sum_irate`, + "etcd_disk_wal_fsync_duration": `etcd:etcd_disk_wal_fsync_duration:avg`, + "etcd_disk_wal_fsync_duration_quantile": `etcd:etcd_disk_wal_fsync_duration:histogram_quantile`, + "etcd_disk_backend_commit_duration": `etcd:etcd_disk_backend_commit_duration:avg`, + "etcd_disk_backend_commit_duration_quantile": `etcd:etcd_disk_backend_commit_duration:histogram_quantile`, + + "apiserver_up_sum": `apiserver:up:sum`, + "apiserver_request_rate": `apiserver:apiserver_request_count:sum_irate`, + "apiserver_request_by_verb_rate": `apiserver:apiserver_request_count:sum_verb_irate`, + "apiserver_request_latencies": `apiserver:apiserver_request_latencies:avg`, + "apiserver_request_by_verb_latencies": `apiserver:apiserver_request_latencies:avg_by_verb`, + + "scheduler_up_sum": `scheduler:up:sum`, + "scheduler_schedule_attempts": `scheduler:scheduler_schedule_attempts:sum`, + "scheduler_schedule_attempt_rate": `scheduler:scheduler_schedule_attempts:sum_rate`, + "scheduler_e2e_scheduling_latency": `scheduler:scheduler_e2e_scheduling_latency:avg`, + "scheduler_e2e_scheduling_latency_quantile": `scheduler:scheduler_e2e_scheduling_latency:histogram_quantile`, + + "controller_manager_up_sum": `controller_manager:up:sum`, + + "coredns_up_sum": `coredns:up:sum`, + "coredns_cache_hits": `coredns:coredns_cache_hits_total:sum_irate`, + "coredns_cache_misses": `coredns:coredns_cache_misses:sum_irate`, + "coredns_dns_request_rate": `coredns:coredns_dns_request_count:sum_irate`, + "coredns_dns_request_duration": `coredns:coredns_dns_request_duration:avg`, + "coredns_dns_request_duration_quantile": `coredns:coredns_dns_request_duration:histogram_quantile`, + "coredns_dns_request_by_type_rate": `coredns:coredns_dns_request_type_count:sum_irate`, + "coredns_dns_request_by_rcode_rate": `coredns:coredns_dns_response_rcode_count:sum_irate`, + "coredns_panic_rate": `coredns:coredns_panic_count:sum_irate`, + "coredns_proxy_request_rate": `coredns:coredns_proxy_request_count:sum_irate`, + "coredns_proxy_request_duration": `coredns:coredns_proxy_request_duration:avg`, + "coredns_proxy_request_duration_quantile": `coredns:coredns_proxy_request_duration:histogram_quantile`, + + "prometheus_up_sum": `prometheus:up:sum`, + "prometheus_tsdb_head_samples_appended_rate": `prometheus:prometheus_tsdb_head_samples_appended:sum_rate`, } diff --git a/pkg/simple/client/prometheus/prometheusclient.go b/pkg/simple/client/prometheus/prometheusclient.go index 9324ff6d2f68ed4d42d7e21e491025fcb6e60ab7..ea3b159dcf6adf2c86dde020b355fe8bd550a877 100644 --- a/pkg/simple/client/prometheus/prometheusclient.go +++ b/pkg/simple/client/prometheus/prometheusclient.go @@ -66,12 +66,13 @@ type MonitoringRequestParams struct { PodName string ContainerName string WorkloadKind string + ComponentName string } var client = &http.Client{} -func SendMonitoringRequest(queryType string, params string) string { - epurl := PrometheusEndpoint + queryType + params +func SendMonitoringRequest(prometheusEndpoint string, queryType string, params string) string { + epurl := prometheusEndpoint + queryType + params response, err := client.Get(epurl) if err != nil { glog.Error(err) @@ -113,6 +114,7 @@ func ParseMonitoringRequestParams(request *restful.Request) *MonitoringRequestPa podName := strings.Trim(request.PathParameter("pod"), " ") containerName := strings.Trim(request.PathParameter("container"), " ") workloadKind := strings.Trim(request.PathParameter("workload_kind"), " ") + componentName := strings.Trim(request.PathParameter("component"), " ") var requestParams = MonitoringRequestParams{ SortMetricName: sortMetricName, @@ -130,6 +132,7 @@ func ParseMonitoringRequestParams(request *restful.Request) *MonitoringRequestPa PodName: podName, ContainerName: containerName, WorkloadKind: workloadKind, + ComponentName: componentName, } if timeout == "" {