From cbb67101b9ec3d9361ed77748d1b90893473a7a0 Mon Sep 17 00:00:00 2001 From: wankai123 Date: Tue, 13 Apr 2021 13:55:22 +0800 Subject: [PATCH] Fix K8s monitoring the incorrect metrics calculates. (#6739) --- CHANGES.md | 2 +- .../src/main/resources/otel-oc-rules/k8s-node.yaml | 6 +++--- .../src/main/resources/otel-oc-rules/k8s-service.yaml | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2fa2e84e0f..414e7e61ba 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,7 +11,7 @@ Release Notes. #### OAP-Backend - +* Fix K8s monitoring the incorrect metrics calculate. #### UI diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml index 4f32102a50..3e8ff0c5b2 100644 --- a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml +++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml @@ -36,7 +36,7 @@ metricsRules: - name: cpu_cores exp: (kube_node_status_capacity * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node']) - name: cpu_usage - exp: (container_cpu_usage_seconds_total * 1000).tagEqual('id' , '/').sum(['cluster' , 'node']).rate('PT1M') + exp: (container_cpu_usage_seconds_total * 1000).tagEqual('id' , '/').sum(['cluster' , 'node']).irate() - name: cpu_cores_allocatable exp: (kube_node_status_allocatable * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node']) - name: cpu_cores_requests @@ -69,6 +69,6 @@ metricsRules: exp: kube_pod_info.sum(['cluster' , 'node']) - name: network_receive - exp: container_network_receive_bytes_total.sum(['cluster' , 'node']).irate() + exp: container_network_receive_bytes_total.tagEqual('id' , '/').sum(['cluster' , 'node']).irate() - name: network_transmit - exp: container_network_transmit_bytes_total.sum(['cluster' , 'node']).irate() + exp: container_network_transmit_bytes_total.tagEqual('id' , '/').sum(['cluster' , 'node']).irate() diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml index 79b86c2460..63dd500a8f 100644 --- a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml +++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml @@ -54,13 +54,13 @@ metricsRules: exp: kube_pod_container_status_restarts_total.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) - name: pod_cpu_usage - exp: (container_cpu_usage_seconds_total * 1000).tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).rate('PT1M') + exp: (container_cpu_usage_seconds_total * 1000).tagNotEqual('container' , '').tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() - name: pod_memory_usage - exp: container_memory_working_set_bytes.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) + exp: container_memory_working_set_bytes.tagNotEqual('container' , '').tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) - name: pod_network_receive - exp: container_network_receive_bytes_total.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() + exp: container_network_receive_bytes_total.tagNotEqual('container' , '').tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() - name: pod_network_transmit - exp: container_network_transmit_bytes_total.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() + exp: container_network_transmit_bytes_total.tagNotEqual('container' , '').tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() - name: pod_fs_usage - exp: container_fs_usage_bytes.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) + exp: container_fs_usage_bytes.tagNotEqual('container' , '').tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) -- GitLab