diff --git a/CHANGES.md b/CHANGES.md index 2fa2e84e0f29a82f25763107dd53ed313ac014c7..414e7e61ba1fe5e08cce00f3b7fc2c85cbd1f52e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,7 +11,7 @@ Release Notes. #### OAP-Backend - +* Fix K8s monitoring the incorrect metrics calculate. #### UI diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml index 4f32102a50dcbf4975ab0a82dc720ea55fa42054..3e8ff0c5b28ee9e1a21b7617432e92465ab3c895 100644 --- a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml +++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml @@ -36,7 +36,7 @@ metricsRules: - name: cpu_cores exp: (kube_node_status_capacity * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node']) - name: cpu_usage - exp: (container_cpu_usage_seconds_total * 1000).tagEqual('id' , '/').sum(['cluster' , 'node']).rate('PT1M') + exp: (container_cpu_usage_seconds_total * 1000).tagEqual('id' , '/').sum(['cluster' , 'node']).irate() - name: cpu_cores_allocatable exp: (kube_node_status_allocatable * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node']) - name: cpu_cores_requests @@ -69,6 +69,6 @@ metricsRules: exp: kube_pod_info.sum(['cluster' , 'node']) - name: network_receive - exp: container_network_receive_bytes_total.sum(['cluster' , 'node']).irate() + exp: container_network_receive_bytes_total.tagEqual('id' , '/').sum(['cluster' , 'node']).irate() - name: network_transmit - exp: container_network_transmit_bytes_total.sum(['cluster' , 'node']).irate() + exp: container_network_transmit_bytes_total.tagEqual('id' , '/').sum(['cluster' , 'node']).irate() diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml index 79b86c24609398aea021662208a1efa5807bf835..63dd500a8f80e1253260e58c35e462f1e39447fd 100644 --- a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml +++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml @@ -54,13 +54,13 @@ metricsRules: exp: kube_pod_container_status_restarts_total.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) - name: pod_cpu_usage - exp: (container_cpu_usage_seconds_total * 1000).tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).rate('PT1M') + exp: (container_cpu_usage_seconds_total * 1000).tagNotEqual('container' , '').tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() - name: pod_memory_usage - exp: container_memory_working_set_bytes.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) + exp: container_memory_working_set_bytes.tagNotEqual('container' , '').tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) - name: pod_network_receive - exp: container_network_receive_bytes_total.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() + exp: container_network_receive_bytes_total.tagNotEqual('container' , '').tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() - name: pod_network_transmit - exp: container_network_transmit_bytes_total.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() + exp: container_network_transmit_bytes_total.tagNotEqual('container' , '').tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() - name: pod_fs_usage - exp: container_fs_usage_bytes.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) + exp: container_fs_usage_bytes.tagNotEqual('container' , '').tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod'])