提交 3a3e4954 编写于 作者: H huanggze 提交者: zryfish

add component monitoring

Signed-off-by: Nhuanggze <loganhuang@yunify.com>
上级 17e09190
......@@ -247,6 +247,13 @@ func addWebService(c *restful.Container) error {
Consumes(restful.MIME_JSON, restful.MIME_XML).
Produces(restful.MIME_JSON)
ws.Route(ws.GET("/components/{component}").To(monitoring.MonitorComponent).
Doc("monitor component level metrics").
Param(ws.QueryParameter("metrics_filter", "metrics names in re2 regex").DataType("string").Required(false).DefaultValue("")).
Metadata(restfulspec.KeyOpenAPITags, tags)).
Consumes(restful.MIME_JSON, restful.MIME_XML).
Produces(restful.MIME_JSON)
c.Add(ws)
return nil
}
......@@ -32,7 +32,7 @@ func MonitorPod(request *restful.Request, response *restful.Response) {
queryType, params, nullRule := metrics.AssemblePodMetricRequestInfo(requestParams, metricName)
var res *metrics.FormatedMetric
if !nullRule {
metricsStr := prometheus.SendMonitoringRequest(queryType, params)
metricsStr := prometheus.SendMonitoringRequest(prometheus.PrometheusEndpoint, queryType, params)
res = metrics.ReformatJson(metricsStr, metricName, map[string]string{"pod_name": ""})
}
response.WriteAsJson(res)
......@@ -154,7 +154,7 @@ func MonitorCluster(request *restful.Request, response *restful.Response) {
if metricName != "" {
// single
queryType, params := metrics.AssembleClusterMetricRequestInfo(requestParams, metricName)
metricsStr := prometheus.SendMonitoringRequest(queryType, params)
metricsStr := prometheus.SendMonitoringRequest(prometheus.PrometheusEndpoint, queryType, params)
res := metrics.ReformatJson(metricsStr, metricName, map[string]string{"cluster": "local"})
response.WriteAsJson(res)
......@@ -172,7 +172,7 @@ func MonitorNode(request *restful.Request, response *restful.Response) {
if metricName != "" {
// single
queryType, params := metrics.AssembleNodeMetricRequestInfo(requestParams, metricName)
metricsStr := prometheus.SendMonitoringRequest(queryType, params)
metricsStr := prometheus.SendMonitoringRequest(prometheus.PrometheusEndpoint, queryType, params)
res := metrics.ReformatJson(metricsStr, metricName, map[string]string{"node": ""})
// The raw node-exporter result doesn't include ip address information
// Thereby, append node ip address to .data.result[].metric
......@@ -198,3 +198,15 @@ func MonitorNode(request *restful.Request, response *restful.Response) {
response.WriteAsJson(pagedMetrics)
}
}
func MonitorComponent(request *restful.Request, response *restful.Response) {
requestParams := prometheus.ParseMonitoringRequestParams(request)
if requestParams.MetricsFilter == "" {
requestParams.MetricsFilter = requestParams.ComponentName + "_.*"
}
rawMetrics := metrics.MonitorAllMetrics(requestParams, metrics.MetricLevelComponent)
response.WriteAsJson(rawMetrics)
}
......@@ -135,7 +135,7 @@ func getAllWorkspaces() map[string]int {
paramValues := make(url.Values)
paramValues.Set("query", WorkspaceNamespaceLabelRule)
params := paramValues.Encode()
res := client.SendMonitoringRequest(client.DefaultQueryType, params)
res := client.SendMonitoringRequest(client.PrometheusEndpoint, client.DefaultQueryType, params)
metric := ReformatJson(res, "", map[string]string{"workspace": "workspace"})
......@@ -240,7 +240,7 @@ func AssembleSpecificWorkloadMetricRequestInfo(monitoringRequest *client.Monitor
paramValues := monitoringRequest.Params
params := makeRequestParamString(rule, paramValues)
res := client.SendMonitoringRequest(client.DefaultQueryType, params)
res := client.SendMonitoringRequest(client.PrometheusEndpoint, client.DefaultQueryType, params)
podNamesFilter := getPodNameRegexInWorkload(res, podsFilter)
......@@ -306,7 +306,7 @@ func AddNodeAddressMetric(nodeMetric *FormatedMetric, nodeAddress *map[string][]
func MonitorContainer(monitoringRequest *client.MonitoringRequestParams, metricName string) *FormatedMetric {
queryType, params := AssembleContainerMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
res := ReformatJson(metricsStr, metricName, map[string]string{"container_name": ""})
return res
}
......@@ -480,7 +480,7 @@ func collectWorkspaceMetric(monitoringRequest *client.MonitoringRequestParams, w
go func(metricName string) {
queryType, params := AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest, namespaceArray, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": ws}) // It's adding "resource_name" field
wg.Done()
......@@ -526,7 +526,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
wg.Add(1)
go func(metricName string) {
queryType, params := AssembleClusterMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"cluster": "local"})
wg.Done()
}(metricName)
......@@ -541,7 +541,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
wg.Add(1)
go func(metricName string) {
queryType, params := AssembleNodeMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"node": ""})
wg.Done()
}(metricName)
......@@ -580,7 +580,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
go func(metricName string, namespace string) {
queryType, params := AssembleNamespaceMetricRequestInfoByNamesapce(monitoringRequest, namespace, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
chForOneMetric <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": namespace})
wgForOneMetric.Done()
}(metricName, ns)
......@@ -624,7 +624,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
wg.Add(1)
go func(metricName string, workspace string) {
queryType, params := AssembleSpecificWorkspaceMetricRequestInfo(monitoringRequest, namespaceArray, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": workspace})
wg.Done()
}(metricName, workspace)
......@@ -642,7 +642,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
go func(metricName string) {
queryType, params := AssembleAllWorkspaceMetricRequestInfo(monitoringRequest, nil, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"workspace": "workspaces"})
wg.Done()
......@@ -660,7 +660,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
go func(metricName string) {
queryType, params := AssembleNamespaceMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
rawResult := ReformatJson(metricsStr, metricName, map[string]string{"namespace": ""})
ch <- rawResult
......@@ -679,7 +679,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
wg.Add(1)
go func(metricName string) {
queryType, params := AssembleAllWorkloadMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
reformattedResult := ReformatJson(metricsStr, metricName, map[string]string{"workload": ""})
// no need to append a null result
ch <- reformattedResult
......@@ -696,7 +696,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
metricName = strings.TrimLeft(metricName, "workload_")
queryType, params, nullRule := AssembleSpecificWorkloadMetricRequestInfo(monitoringRequest, metricName)
if !nullRule {
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
fmtMetrics := ReformatJson(metricsStr, metricName, map[string]string{"pod_name": ""})
unifyMetricHistoryTimeRange(fmtMetrics)
ch <- fmtMetrics
......@@ -716,7 +716,7 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
go func(metricName string) {
queryType, params, nullRule := AssemblePodMetricRequestInfo(monitoringRequest, metricName)
if !nullRule {
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"pod_name": ""})
} else {
ch <- nil
......@@ -734,13 +734,28 @@ func MonitorAllMetrics(monitoringRequest *client.MonitoringRequestParams, resour
wg.Add(1)
go func(metricName string) {
queryType, params := AssembleContainerMetricRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(queryType, params)
metricsStr := client.SendMonitoringRequest(client.PrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"container_name": ""})
wg.Done()
}(metricName)
}
}
}
case MetricLevelComponent:
{
for _, metricName := range ComponentMetricsNames {
matched, err := regexp.MatchString(metricsFilter, metricName)
if err == nil && matched {
wg.Add(1)
go func(metricName string) {
queryType, params := AssembleComponentRequestInfo(monitoringRequest, metricName)
metricsStr := client.SendMonitoringRequest(client.SecondaryPrometheusEndpoint, queryType, params)
ch <- ReformatJson(metricsStr, metricName, map[string]string{"resource_name": monitoringRequest.ComponentName})
wg.Done()
}(metricName)
}
}
}
}
wg.Wait()
......@@ -928,3 +943,12 @@ func AssembleNodeMetricRequestInfo(monitoringRequest *client.MonitoringRequestPa
return queryType, params
}
func AssembleComponentRequestInfo(monitoringRequest *client.MonitoringRequestParams, metricName string) (string, string) {
queryType := monitoringRequest.QueryType
paramValues := monitoringRequest.Params
rule := MakeComponentRule(metricName)
params := makeRequestParamString(rule, paramValues)
return queryType, params
}
......@@ -238,3 +238,8 @@ func MakeNodeRule(nodeID string, nodesFilter string, metricsName string) string
return rule
}
func MakeComponentRule(metricsName string) string {
var rule = RulePromQLTmplMap[metricsName]
return rule
}
......@@ -65,6 +65,7 @@ const (
MetricLevelContainer = "container"
MetricLevelContainerName = "container_name"
MetricLevelWorkload = "workload"
MetricLevelComponent = "component"
)
const (
......@@ -319,6 +320,58 @@ var ContainerMetricsNames = []string{
//"container_net_bytes_received",
}
var ComponentMetricsNames = []string{
"etcd_server_deployed_sum",
"etcd_server_up_sum",
"etcd_server_has_leader",
"etcd_server_leader_changes",
"etcd_server_proposals_failed_rate",
"etcd_server_proposals_applied_rate",
"etcd_server_proposals_committed_rate",
"etcd_server_proposals_pending_count",
"etcd_mvcc_db_size",
"etcd_network_client_grpc_received_bytes",
"etcd_network_client_grpc_sent_bytes",
"etcd_grpc_call_rate",
"etcd_grpc_call_failed_rate",
"etcd_grpc_server_msg_received_rate",
"etcd_grpc_server_msg_sent_rate",
"etcd_disk_wal_fsync_duration",
"etcd_disk_wal_fsync_duration_quantile",
"etcd_disk_backend_commit_duration",
"etcd_disk_backend_commit_duration_quantile",
"apiserver_up_sum",
"apiserver_request_rate",
"apiserver_request_by_verb_rate",
"apiserver_request_latencies",
"apiserver_request_by_verb_latencies",
"scheduler_up_sum",
"scheduler_schedule_attempts",
"scheduler_schedule_attempt_rate",
"scheduler_e2e_scheduling_latency",
"scheduler_e2e_scheduling_latency_quantile",
"controller_manager_up_sum",
"coredns_up_sum",
"coredns_cache_hits",
"coredns_cache_misses",
"coredns_dns_request_rate",
"coredns_dns_request_duration",
"coredns_dns_request_duration_quantile",
"coredns_dns_request_by_type_rate",
"coredns_dns_request_by_rcode_rate",
"coredns_panic_rate",
"coredns_proxy_request_rate",
"coredns_proxy_request_duration",
"coredns_proxy_request_duration_quantile",
"prometheus_up_sum",
"prometheus_tsdb_head_samples_appended_rate",
}
var RulePromQLTmplMap = MetricMap{
//cluster
"cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m",
......@@ -614,4 +667,55 @@ var RulePromQLTmplMap = MetricMap{
// New in ks 2.0
"workspace_pod_abnormal_ratio": `sum(kube_pod_status_phase{phase=~"Failed|Pending|Unknown", namespace!="", namespace$1}) / sum(kube_pod_status_phase{phase!~"Succeeded", namespace!="", namespace$1})`,
// component
"etcd_server_deployed_sum": `count(up{job="etcd"})`,
"etcd_server_up_sum": `etcd:up:sum`,
"etcd_server_has_leader": `etcd_server_has_leader`,
"etcd_server_leader_changes": `etcd:etcd_server_leader_changes_seen:sum_changes`,
"etcd_server_proposals_failed_rate": `etcd:etcd_server_proposals_failed:sum_irate`,
"etcd_server_proposals_applied_rate": `etcd:etcd_server_proposals_applied:sum_irate`,
"etcd_server_proposals_committed_rate": `etcd:etcd_server_proposals_committed:sum_irate`,
"etcd_server_proposals_pending_count": `etcd:etcd_server_proposals_pending:sum`,
"etcd_mvcc_db_size": `etcd:etcd_debugging_mvcc_db_total_size:sum`,
"etcd_network_client_grpc_received_bytes": `etcd:etcd_network_client_grpc_received_bytes:sum_irate`,
"etcd_network_client_grpc_sent_bytes": `etcd:etcd_network_client_grpc_sent_bytes:sum_irate`,
"etcd_grpc_call_rate": `etcd:grpc_server_started:sum_irate`,
"etcd_grpc_call_failed_rate": `etcd:grpc_server_handled:sum_irate`,
"etcd_grpc_server_msg_received_rate": `etcd:grpc_server_msg_received:sum_irate`,
"etcd_grpc_server_msg_sent_rate": `etcd:grpc_server_msg_sent:sum_irate`,
"etcd_disk_wal_fsync_duration": `etcd:etcd_disk_wal_fsync_duration:avg`,
"etcd_disk_wal_fsync_duration_quantile": `etcd:etcd_disk_wal_fsync_duration:histogram_quantile`,
"etcd_disk_backend_commit_duration": `etcd:etcd_disk_backend_commit_duration:avg`,
"etcd_disk_backend_commit_duration_quantile": `etcd:etcd_disk_backend_commit_duration:histogram_quantile`,
"apiserver_up_sum": `apiserver:up:sum`,
"apiserver_request_rate": `apiserver:apiserver_request_count:sum_irate`,
"apiserver_request_by_verb_rate": `apiserver:apiserver_request_count:sum_verb_irate`,
"apiserver_request_latencies": `apiserver:apiserver_request_latencies:avg`,
"apiserver_request_by_verb_latencies": `apiserver:apiserver_request_latencies:avg_by_verb`,
"scheduler_up_sum": `scheduler:up:sum`,
"scheduler_schedule_attempts": `scheduler:scheduler_schedule_attempts:sum`,
"scheduler_schedule_attempt_rate": `scheduler:scheduler_schedule_attempts:sum_rate`,
"scheduler_e2e_scheduling_latency": `scheduler:scheduler_e2e_scheduling_latency:avg`,
"scheduler_e2e_scheduling_latency_quantile": `scheduler:scheduler_e2e_scheduling_latency:histogram_quantile`,
"controller_manager_up_sum": `controller_manager:up:sum`,
"coredns_up_sum": `coredns:up:sum`,
"coredns_cache_hits": `coredns:coredns_cache_hits_total:sum_irate`,
"coredns_cache_misses": `coredns:coredns_cache_misses:sum_irate`,
"coredns_dns_request_rate": `coredns:coredns_dns_request_count:sum_irate`,
"coredns_dns_request_duration": `coredns:coredns_dns_request_duration:avg`,
"coredns_dns_request_duration_quantile": `coredns:coredns_dns_request_duration:histogram_quantile`,
"coredns_dns_request_by_type_rate": `coredns:coredns_dns_request_type_count:sum_irate`,
"coredns_dns_request_by_rcode_rate": `coredns:coredns_dns_response_rcode_count:sum_irate`,
"coredns_panic_rate": `coredns:coredns_panic_count:sum_irate`,
"coredns_proxy_request_rate": `coredns:coredns_proxy_request_count:sum_irate`,
"coredns_proxy_request_duration": `coredns:coredns_proxy_request_duration:avg`,
"coredns_proxy_request_duration_quantile": `coredns:coredns_proxy_request_duration:histogram_quantile`,
"prometheus_up_sum": `prometheus:up:sum`,
"prometheus_tsdb_head_samples_appended_rate": `prometheus:prometheus_tsdb_head_samples_appended:sum_rate`,
}
......@@ -66,12 +66,13 @@ type MonitoringRequestParams struct {
PodName string
ContainerName string
WorkloadKind string
ComponentName string
}
var client = &http.Client{}
func SendMonitoringRequest(queryType string, params string) string {
epurl := PrometheusEndpoint + queryType + params
func SendMonitoringRequest(prometheusEndpoint string, queryType string, params string) string {
epurl := prometheusEndpoint + queryType + params
response, err := client.Get(epurl)
if err != nil {
glog.Error(err)
......@@ -113,6 +114,7 @@ func ParseMonitoringRequestParams(request *restful.Request) *MonitoringRequestPa
podName := strings.Trim(request.PathParameter("pod"), " ")
containerName := strings.Trim(request.PathParameter("container"), " ")
workloadKind := strings.Trim(request.PathParameter("workload_kind"), " ")
componentName := strings.Trim(request.PathParameter("component"), " ")
var requestParams = MonitoringRequestParams{
SortMetricName: sortMetricName,
......@@ -130,6 +132,7 @@ func ParseMonitoringRequestParams(request *restful.Request) *MonitoringRequestPa
PodName: podName,
ContainerName: containerName,
WorkloadKind: workloadKind,
ComponentName: componentName,
}
if timeout == "" {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册