From f9db9ba2dce4d71f4b0ec3d915e883091e627967 Mon Sep 17 00:00:00 2001 From: yswdqz <74546965+yswdqz@users.noreply.github.com> Date: Fri, 23 Sep 2022 20:31:17 +0800 Subject: [PATCH] Support monitoring PostgreSQL slow SQLs. (#9673) --- docs/en/changes/changes.md | 2 + .../backend/backend-postgresql-monitoring.md | 38 +- .../src/main/resources/application.yml | 2 +- .../src/main/resources/lal/pgsql-slowsql.yaml | 35 + .../postgresql/postgresql-service.json | 1370 +++++++++-------- .../postgresql/expected/db-has-value.yml | 23 + .../postgres-exporter/docker-compose.yml | 44 +- .../postgresql/postgres-exporter/e2e.yaml | 2 +- .../postgres-exporter/fluent-bit-parser.conf | 19 + .../postgres-exporter/fluent-bit-script.lua | 100 ++ .../postgres-exporter/fluent-bit.conf | 43 + .../postgres-exporter/postgresql.conf | 50 + .../cases/postgresql/postgresql-cases.yaml | 3 + 13 files changed, 1055 insertions(+), 676 deletions(-) create mode 100644 oap-server/server-starter/src/main/resources/lal/pgsql-slowsql.yaml create mode 100644 test/e2e-v2/cases/postgresql/expected/db-has-value.yml create mode 100644 test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit-parser.conf create mode 100644 test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit-script.lua create mode 100644 test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit.conf create mode 100644 test/e2e-v2/cases/postgresql/postgres-exporter/postgresql.conf diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md index 5314581b52..1ce03c3648 100644 --- a/docs/en/changes/changes.md +++ b/docs/en/changes/changes.md @@ -19,6 +19,7 @@ * Remove physical index existing check and keep template existing check only to avoid meaningless `retry wait` in `no-init` mode. * Make sure instance list ordered in TTL processor to avoid TTL timer never runs. +* Support monitoring PostgreSQL slow SQLs. #### UI @@ -40,6 +41,7 @@ * Remove `responseCode` fields of all OAL sources, as well as examples to avoid user's confusion. * Remove All from the endpoints selector. * Enhance menu configurations to make it easier to change. +* Update PostgreSQL dashboard to visualize collected slow SQLs. #### Documentation diff --git a/docs/en/setup/backend/backend-postgresql-monitoring.md b/docs/en/setup/backend/backend-postgresql-monitoring.md index 48849b18f5..1956758c5c 100644 --- a/docs/en/setup/backend/backend-postgresql-monitoring.md +++ b/docs/en/setup/backend/backend-postgresql-monitoring.md @@ -1,21 +1,22 @@ # PostgreSQL monitoring +## PostgreSQL server performance from `postgres-exporter` SkyWalking leverages postgres-exporter for collecting metrics data from PostgreSQL. It leverages OpenTelemetry Collector to transfer the metrics to [OpenTelemetry receiver](opentelemetry-receiver.md) and into the [Meter System](./../../concepts-and-designs/meter.md). -## Data flow +### Data flow 1. postgres-exporter collect metrics data from PostgreSQL. 2. OpenTelemetry Collector fetches metrics from postgres-exporter via Prometheus Receiver and pushes metrics to SkyWalking OAP Server via the OpenCensus gRPC Exporter or OpenTelemetry gRPC exporter. 3. The SkyWalking OAP Server parses the expression with [MAL](../../concepts-and-designs/mal.md) to filter/calculate/aggregate and store the results. -## Set up +### Set up 1. Set up [postgres-exporter](https://github.com/prometheus-community/postgres_exporter#quick-start). 2. Set up [OpenTelemetry Collector ](https://opentelemetry.io/docs/collector/getting-started/#docker). For details on Prometheus Receiver in OpenTelemetry Collector, refer to [here](../../../../test/e2e-v2/cases/postgresql/postgres-exporter/otel-collector-config.yaml). 3. Config SkyWalking [OpenTelemetry receiver](opentelemetry-receiver.md). -## PostgreSQL Monitoring +### PostgreSQL Monitoring PostgreSQL monitoring provides monitoring of the status and resources of the PostgreSQL server.PostgreSQL server as a `Service` in OAP, and land on the `Layer: POSTGRESQL`. -### PostgreSQL Supported Metrics +#### Supported Metrics | Monitoring Panel | Unit | Metric Name | Description | Data Source | |-----|------|-----|-----|-----| | Shared Buffers | MB | meter_pg_shared_buffers | The number of shared memory buffers used by the server | postgres-exporter| @@ -44,8 +45,33 @@ PostgreSQL monitoring provides monitoring of the status and resources of the Pos | Idle Sessions | | meter_pg_idle_sessions | The number of connections which state is idle,idle in transaction or idle in transaction (aborted) | postgres-exporter| | Locks Count | | meter_pg_locks_count | Number of locks | postgres-exporter| - -## Customizations +### Customizations You can customize your own metrics/expression/dashboard panel. The metrics definition and expression rules are found in `/config/otel-rules/postgresql.yaml`. The PostgreSQL dashboard panel configurations are found in `/config/ui-initialized-templates/postgresql`. + +## Collect sampled slow SQLs +SkyWalking leverages [fluentbit](https://fluentbit.io/) or other log agents for collecting slow SQL statements from PostgreSQL. + +### Data flow +1. fluentbit agent collects slow sql logs from PostgreSQL. +2. fluentbit agent sends data to SkyWalking OAP Server using native log APIs via HTTP. +3. The SkyWalking OAP Server parses the expression with [LAL](../../concepts-and-designs/lal.md) to parse/extract and store the results. + +### Set up +1. Set up [fluentbit](https://docs.fluentbit.io/manual/installation/docker). +2. Config [fluentbit](../../../../test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit.conf) +3. Config PostgreSQL to enable slow log. [Example](../../../../test/e2e-v2/cases/postgresql/postgres-exporter/postgresql.conf). + +### Slow SQL Monitoring +Slow SQL monitoring provides monitoring of the slow SQL statements of the PostgreSQL server. PostgreSQL server is cataloged as a `Layer: POSTGRESQL` `Service` in OAP. + +#### Supported Metrics +| Monitoring Panel | Unit | Metric Name | Description | Data Source | +|-----|------|-----|-----|-----| +|Slow Statements | ms | top_n_database_statement | The latency and statement of PostgreSQL slow SQLs | fluentbit| + +### Customizations +You can customize your own metrics/expression/dashboard panel. +The slowsql expression rules are found in `/config/lal/pgsql-slowsql.yaml` +The PostgreSQL dashboard panel configurations are found in `/config/ui-initialized-templates/postgresql`. \ No newline at end of file diff --git a/oap-server/server-starter/src/main/resources/application.yml b/oap-server/server-starter/src/main/resources/application.yml index 85f53bb61a..0da69bfc5d 100644 --- a/oap-server/server-starter/src/main/resources/application.yml +++ b/oap-server/server-starter/src/main/resources/application.yml @@ -241,7 +241,7 @@ agent-analyzer: log-analyzer: selector: ${SW_LOG_ANALYZER:default} default: - lalFiles: ${SW_LOG_LAL_FILES:envoy-als,mysql-slowsql,default} + lalFiles: ${SW_LOG_LAL_FILES:envoy-als,mysql-slowsql,pgsql-slowsql,default} malFiles: ${SW_LOG_MAL_FILES:""} event-analyzer: diff --git a/oap-server/server-starter/src/main/resources/lal/pgsql-slowsql.yaml b/oap-server/server-starter/src/main/resources/lal/pgsql-slowsql.yaml new file mode 100644 index 0000000000..be3aeb291d --- /dev/null +++ b/oap-server/server-starter/src/main/resources/lal/pgsql-slowsql.yaml @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +rules: + - name: pgsql-slowsql + layer: POSTGRESQL + dsl: | + filter { + json{ + } + extractor{ + layer parsed.layer as String + service parsed.service as String + timestamp parsed.time as String + if (tag("LOG_KIND") == "SLOW_SQL") { + slowSql { + id parsed.id as String + statement parsed.statement as String + latency parsed.query_time as Long + } + } + } + } diff --git a/oap-server/server-starter/src/main/resources/ui-initialized-templates/postgresql/postgresql-service.json b/oap-server/server-starter/src/main/resources/ui-initialized-templates/postgresql/postgresql-service.json index 385b42235a..85c804a3f8 100644 --- a/oap-server/server-starter/src/main/resources/ui-initialized-templates/postgresql/postgresql-service.json +++ b/oap-server/server-starter/src/main/resources/ui-initialized-templates/postgresql/postgresql-service.json @@ -23,672 +23,718 @@ { "x":0, "y":0, - "w":4, - "h":6, + "w":24, + "h":33, "i":"0", - "type":"Widget", - "metricTypes":[ - "readMetricsValue" - ], - "metrics":[ - "meter_pg_shared_buffers" - ], - "graph":{ - "type":"Card", - "fontSize":40, - "textAlign":"center", - "showUnit":true - }, - "metricConfig":[ + "type":"Tab", + "children":[ { - "calculation":"byteToMB" - } - ], - "widget":{ - "title":"Shared Buffers (MB)" - } - }, - { - "x":4, - "y":0, - "w":5, - "h":6, - "i":"1", - "type":"Widget", - "metricTypes":[ - "readMetricsValue" - ], - "metrics":[ - "meter_pg_effective_cache" - ], - "graph":{ - "type":"Card", - "fontSize":40, - "textAlign":"center", - "showUnit":true - }, - "metricConfig":[ - { - "calculation":"byteToGB" - } - ], - "widget":{ - "title":"Effective Cache (GB)" - } - }, - { - "x":4, - "y":6, - "w":5, - "h":6, - "i":"2", - "type":"Widget", - "metricTypes":[ - "readMetricsValue" - ], - "metrics":[ - "meter_pg_maintenance_work_mem" - ], - "graph":{ - "type":"Card", - "fontSize":40, - "textAlign":"center", - "showUnit":true - }, - "widget":{ - "title":"Maintenance Work Mem (MB)" - }, - "metricConfig":[ + "name":"Inspections", + "children":[ + { + "x":0, + "y":0, + "w":4, + "h":6, + "i":"0", + "type":"Widget", + "metricTypes":[ + "readMetricsValue" + ], + "metrics":[ + "meter_pg_shared_buffers" + ], + "graph":{ + "type":"Card", + "fontSize":40, + "textAlign":"center", + "showUnit":true + }, + "metricConfig":[ + { + "calculation":"byteToMB" + } + ], + "widget":{ + "title":"Shared Buffers (MB)" + } + }, + { + "x":4, + "y":0, + "w":5, + "h":6, + "i":"1", + "type":"Widget", + "metricTypes":[ + "readMetricsValue" + ], + "metrics":[ + "meter_pg_effective_cache" + ], + "graph":{ + "type":"Card", + "fontSize":40, + "textAlign":"center", + "showUnit":true + }, + "metricConfig":[ + { + "calculation":"byteToGB" + } + ], + "widget":{ + "title":"Effective Cache (GB)" + } + }, + { + "x":4, + "y":6, + "w":5, + "h":6, + "i":"2", + "type":"Widget", + "metricTypes":[ + "readMetricsValue" + ], + "metrics":[ + "meter_pg_maintenance_work_mem" + ], + "graph":{ + "type":"Card", + "fontSize":40, + "textAlign":"center", + "showUnit":true + }, + "widget":{ + "title":"Maintenance Work Mem (MB)" + }, + "metricConfig":[ + { + "calculation":"byteToMB" + } + ] + }, + { + "x":14, + "y":6, + "w":5, + "h":6, + "i":"3", + "type":"Widget", + "metricTypes":[ + "readMetricsValue" + ], + "metrics":[ + "meter_pg_work_mem" + ], + "graph":{ + "type":"Card", + "fontSize":40, + "textAlign":"center", + "showUnit":true + }, + "metricConfig":[ + { + "calculation":"byteToMB" + } + ], + "widget":{ + "title":"Work Mem (MB)" + } + }, + { + "x":9, + "y":0, + "w":5, + "h":6, + "i":"4", + "type":"Widget", + "metricTypes":[ + "readMetricsValue" + ], + "metrics":[ + "meter_pg_seq_page_cost" + ], + "graph":{ + "type":"Card", + "fontSize":40, + "textAlign":"center", + "showUnit":true + }, + "widget":{ + "title":"Seq Page Cost" + } + }, + { + "x":14, + "y":0, + "w":5, + "h":6, + "i":"5", + "type":"Widget", + "metricTypes":[ + "readMetricsValue" + ], + "metrics":[ + "meter_pg_random_page_cost" + ], + "graph":{ + "type":"Card", + "fontSize":40, + "textAlign":"center", + "showUnit":true + }, + "widget":{ + "title":"Random Page Cost" + } + }, + { + "x":0, + "y":6, + "w":4, + "h":6, + "i":"6", + "type":"Widget", + "metricTypes":[ + "readMetricsValue" + ], + "metrics":[ + "meter_pg_max_wal_size" + ], + "graph":{ + "type":"Card", + "fontSize":40, + "textAlign":"center", + "showUnit":true + }, + "widget":{ + "title":"Max WAL Size (GB)" + }, + "metricConfig":[ + { + "calculation":"byteToGB" + } + ] + }, + { + "x":9, + "y":6, + "w":5, + "h":6, + "i":"7", + "type":"Widget", + "metricTypes":[ + "readMetricsValue" + ], + "metrics":[ + "meter_pg_max_parallel_workers" + ], + "graph":{ + "type":"Card", + "fontSize":40, + "textAlign":"center", + "showUnit":true + }, + "widget":{ + "title":"Max Parallel Workers" + } + }, + { + "x":19, + "y":0, + "w":5, + "h":6, + "i":"8", + "type":"Widget", + "metricTypes":[ + "readMetricsValue" + ], + "metrics":[ + "meter_pg_max_worker_processes" + ], + "graph":{ + "type":"Card", + "fontSize":40, + "textAlign":"center", + "showUnit":true + }, + "widget":{ + "title":"Max Worker Processes" + } + }, + { + "x":0, + "y":12, + "w":6, + "h":10, + "i":"9", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_fetched_rows_rate" + ], + "graph":{ + "type":"Area", + "opacity":0.4, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Fetched Rows Trend (rows per second)" + }, + "value":"9", + "label":"9" + }, + { + "x":6, + "y":12, + "w":6, + "h":10, + "i":"10", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_inserted_rows_rate" + ], + "graph":{ + "type":"Area", + "opacity":0.4, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Inserted Rows Trend (rows per second)" + }, + "value":"10", + "label":"10" + }, + { + "x":6, + "y":32, + "w":6, + "h":10, + "i":"11", + "type":"Widget", + "metricTypes":[ + "readMetricsValues", + "readMetricsValues", + "readMetricsValues", + "readMetricsValues", + "readMetricsValues" + ], + "metrics":[ + "meter_pg_buffers_clean", + "meter_pg_buffers_backend", + "meter_pg_buffers_checkpoint", + "meter_pg_buffers_backend_fsync", + "meter_pg_buffers_alloc" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Buffers Trend (per second)" + }, + "value":"11", + "label":"11" + }, + { + "x":12, + "y":12, + "w":6, + "h":10, + "i":"12", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_updated_rows_rate" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Updated Rows Trend (rows per second)" + }, + "value":"12", + "label":"12" + }, + { + "x":18, + "y":12, + "w":6, + "h":10, + "i":"13", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_deleted_rows_rate" + ], + "graph":{ + "type":"Area", + "opacity":0.4, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Deleted Rows Trend (rows per second)" + }, + "value":"13", + "label":"13" + }, + { + "x":0, + "y":22, + "w":6, + "h":10, + "i":"14", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_returned_rows_rate" + ], + "graph":{ + "type":"Area", + "opacity":0.4, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Returned Rows Trend (rows per second)" + }, + "value":"14", + "label":"14" + }, + { + "x":6, + "y":42, + "w":6, + "h":10, + "i":"15", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_temporary_files_rate" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "value":"15", + "label":"15", + "widget":{ + "title":"Temporary Files Trend (per second)" + } + }, + { + "x":12, + "y":22, + "w":6, + "h":10, + "i":"16", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_committed_transactions_rate" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Committed Transactions Trend (per second)" + }, + "value":"16", + "label":"16" + }, + { + "x":6, + "y":22, + "w":6, + "h":10, + "i":"17", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_rolled_back_transactions_rate" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Rolled Back Transactions Trend (per second)" + }, + "value":"17", + "label":"17" + }, + { + "x":0, + "y":32, + "w":6, + "h":10, + "i":"18", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_cache_hit_rate" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Cache Hit Rate (%)" + }, + "value":"18", + "label":"18" + }, + { + "x":12, + "y":42, + "w":6, + "h":10, + "i":"19", + "type":"Widget", + "metricTypes":[ + "readMetricsValues", + "readMetricsValues", + "readMetricsValues", + "readMetricsValues" + ], + "metrics":[ + "meter_pg_checkpoint_sync_time_rate", + "meter_pg_checkpoints_timed_rate", + "meter_pg_checkpoint_req_rate", + "meter_pg_checkpoint_write_time_rate" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Checkpoint Stat Trend (per second)" + }, + "value":"19", + "label":"19" + }, + { + "x":18, + "y":22, + "w":6, + "h":10, + "i":"20", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_conflicts_rate" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Conflicts Trend (per second)" + }, + "value":"20", + "label":"20" + }, + { + "x":0, + "y":42, + "w":6, + "h":10, + "i":"21", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_deadlocks_rate" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Deadlocks Trend (per second)" + }, + "value":"21", + "label":"21" + }, + { + "x":12, + "y":32, + "w":6, + "h":10, + "i":"22", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_active_sessions" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Active Sessions" + }, + "value":"22", + "label":"22" + }, + { + "x":18, + "y":42, + "w":6, + "h":10, + "i":"23", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_locks_count" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Locks" + }, + "value":"23", + "label":"23" + }, + { + "x":18, + "y":32, + "w":6, + "h":10, + "i":"24", + "type":"Widget", + "metricTypes":[ + "readLabeledMetricsValues" + ], + "metrics":[ + "meter_pg_idle_sessions" + ], + "graph":{ + "type":"Line", + "step":false, + "smooth":false, + "showSymbol":false, + "showXAxis":true, + "showYAxis":true + }, + "widget":{ + "title":"Idle Sessions" + }, + "value":"24", + "label":"24" + } + ] + }, { - "calculation":"byteToMB" + "name":"Slow Statements", + "children":[ + { + "x":0, + "y":0, + "w":24, + "h":29, + "i":"0", + "type":"Widget", + "metricTypes":[ + "readSampledRecords" + ], + "metrics":[ + "top_n_database_statement" + ], + "graph":{ + "type":"TopList", + "color":"purple" + }, + "widget":{ + "title":"Slow Statements (ms)" + }, + "metricConfig":[ + { + "topN":"50" + } + ] + } + ] } ] - }, - { - "x":14, - "y":6, - "w":5, - "h":6, - "i":"3", - "type":"Widget", - "metricTypes":[ - "readMetricsValue" - ], - "metrics":[ - "meter_pg_work_mem" - ], - "graph":{ - "type":"Card", - "fontSize":40, - "textAlign":"center", - "showUnit":true - }, - "metricConfig":[ - { - "calculation":"byteToMB" - } - ], - "widget":{ - "title":"Work Mem (MB)" - } - }, - { - "x":9, - "y":0, - "w":5, - "h":6, - "i":"4", - "type":"Widget", - "metricTypes":[ - "readMetricsValue" - ], - "metrics":[ - "meter_pg_seq_page_cost" - ], - "graph":{ - "type":"Card", - "fontSize":40, - "textAlign":"center", - "showUnit":true - }, - "widget":{ - "title":"Seq Page Cost" - } - }, - { - "x":14, - "y":0, - "w":5, - "h":6, - "i":"5", - "type":"Widget", - "metricTypes":[ - "readMetricsValue" - ], - "metrics":[ - "meter_pg_random_page_cost" - ], - "graph":{ - "type":"Card", - "fontSize":40, - "textAlign":"center", - "showUnit":true - }, - "widget":{ - "title":"Random Page Cost" - } - }, - { - "x":0, - "y":6, - "w":4, - "h":6, - "i":"6", - "type":"Widget", - "metricTypes":[ - "readMetricsValue" - ], - "metrics":[ - "meter_pg_max_wal_size" - ], - "graph":{ - "type":"Card", - "fontSize":40, - "textAlign":"center", - "showUnit":true - }, - "widget":{ - "title":"Max WAL Size (GB)" - }, - "metricConfig":[ - { - "calculation":"byteToGB" - } - ] - }, - { - "x":9, - "y":6, - "w":5, - "h":6, - "i":"7", - "type":"Widget", - "metricTypes":[ - "readMetricsValue" - ], - "metrics":[ - "meter_pg_max_parallel_workers" - ], - "graph":{ - "type":"Card", - "fontSize":40, - "textAlign":"center", - "showUnit":true - }, - "widget":{ - "title":"Max Parallel Workers" - } - }, - { - "x":19, - "y":0, - "w":5, - "h":6, - "i":"8", - "type":"Widget", - "metricTypes":[ - "readMetricsValue" - ], - "metrics":[ - "meter_pg_max_worker_processes" - ], - "graph":{ - "type":"Card", - "fontSize":40, - "textAlign":"center", - "showUnit":true - }, - "widget":{ - "title":"Max Worker Processes" - } - }, - { - "x":0, - "y":12, - "w":6, - "h":10, - "i":"9", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_fetched_rows_rate" - ], - "graph":{ - "type":"Area", - "opacity":0.4, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Fetched Rows Trend (rows per second)" - }, - "value":"9", - "label":"9" - }, - { - "x":6, - "y":12, - "w":6, - "h":10, - "i":"10", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_inserted_rows_rate" - ], - "graph":{ - "type":"Area", - "opacity":0.4, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Inserted Rows Trend (rows per second)" - }, - "value":"10", - "label":"10" - }, - { - "x":6, - "y":32, - "w":6, - "h":10, - "i":"11", - "type":"Widget", - "metricTypes":[ - "readMetricsValues", - "readMetricsValues", - "readMetricsValues", - "readMetricsValues", - "readMetricsValues" - ], - "metrics":[ - "meter_pg_buffers_clean", - "meter_pg_buffers_backend", - "meter_pg_buffers_checkpoint", - "meter_pg_buffers_backend_fsync", - "meter_pg_buffers_alloc" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Buffers Trend (per second)" - }, - "value":"11", - "label":"11" - }, - { - "x":12, - "y":12, - "w":6, - "h":10, - "i":"12", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_updated_rows_rate" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Updated Rows Trend (rows per second)" - }, - "value":"12", - "label":"12" - }, - { - "x":18, - "y":12, - "w":6, - "h":10, - "i":"13", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_deleted_rows_rate" - ], - "graph":{ - "type":"Area", - "opacity":0.4, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Deleted Rows Trend (rows per second)" - }, - "value":"13", - "label":"13" - }, - { - "x":0, - "y":22, - "w":6, - "h":10, - "i":"14", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_returned_rows_rate" - ], - "graph":{ - "type":"Area", - "opacity":0.4, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Returned Rows Trend (rows per second)" - }, - "value":"14", - "label":"14" - }, - { - "x":6, - "y":42, - "w":6, - "h":10, - "i":"15", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_temporary_files_rate" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "value":"15", - "label":"15", - "widget":{ - "title":"Temporary Files Trend (per second)" - } - }, - { - "x":12, - "y":22, - "w":6, - "h":10, - "i":"16", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_committed_transactions_rate" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Committed Transactions Trend (per second)" - }, - "value":"16", - "label":"16" - }, - { - "x":6, - "y":22, - "w":6, - "h":10, - "i":"17", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_rolled_back_transactions_rate" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Rolled Back Transactions Trend (per second)" - }, - "value":"17", - "label":"17" - }, - { - "x":0, - "y":32, - "w":6, - "h":10, - "i":"18", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_cache_hit_rate" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Cache Hit Rate (%)" - }, - "value":"18", - "label":"18" - }, - { - "x":12, - "y":42, - "w":6, - "h":10, - "i":"19", - "type":"Widget", - "metricTypes":[ - "readMetricsValues", - "readMetricsValues", - "readMetricsValues", - "readMetricsValues" - ], - "metrics":[ - "meter_pg_checkpoint_sync_time_rate", - "meter_pg_checkpoints_timed_rate", - "meter_pg_checkpoint_req_rate", - "meter_pg_checkpoint_write_time_rate" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Checkpoint Stat Trend (per second)" - }, - "value":"19", - "label":"19" - }, - { - "x":18, - "y":22, - "w":6, - "h":10, - "i":"20", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_conflicts_rate" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Conflicts Trend (per second)" - }, - "value":"20", - "label":"20" - }, - { - "x":0, - "y":42, - "w":6, - "h":10, - "i":"21", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_deadlocks_rate" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Deadlocks Trend (per second)" - }, - "value":"21", - "label":"21" - }, - { - "x":12, - "y":32, - "w":6, - "h":10, - "i":"22", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_active_sessions" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Active Sessions" - }, - "value":"22", - "label":"22" - }, - { - "x":18, - "y":42, - "w":6, - "h":10, - "i":"23", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_locks_count" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Locks" - }, - "value":"23", - "label":"23" - }, - { - "x":18, - "y":32, - "w":6, - "h":10, - "i":"24", - "type":"Widget", - "metricTypes":[ - "readLabeledMetricsValues" - ], - "metrics":[ - "meter_pg_idle_sessions" - ], - "graph":{ - "type":"Line", - "step":false, - "smooth":false, - "showSymbol":false, - "showXAxis":true, - "showYAxis":true - }, - "widget":{ - "title":"Idle Sessions" - }, - "value":"24", - "label":"24" } ], "layer":"POSTGRESQL", @@ -697,4 +743,4 @@ "id":"7da9d2e6-7c10-4ab4-ba86-fd73889608ff" } } -] \ No newline at end of file +] diff --git a/test/e2e-v2/cases/postgresql/expected/db-has-value.yml b/test/e2e-v2/cases/postgresql/expected/db-has-value.yml new file mode 100644 index 0000000000..a9ec0add46 --- /dev/null +++ b/test/e2e-v2/cases/postgresql/expected/db-has-value.yml @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- contains . }} +- key: 0 + value: + {{- contains .value }} + - key: {{ notEmpty .key }} + value: {{ notEmpty .value }} + {{- end }} +{{- end }} diff --git a/test/e2e-v2/cases/postgresql/postgres-exporter/docker-compose.yml b/test/e2e-v2/cases/postgresql/postgres-exporter/docker-compose.yml index bf63e9b35b..1c844eb31a 100644 --- a/test/e2e-v2/cases/postgresql/postgres-exporter/docker-compose.yml +++ b/test/e2e-v2/cases/postgresql/postgres-exporter/docker-compose.yml @@ -21,22 +21,40 @@ services: file: ../../../script/docker-compose/base-compose.yml service: oap environment: - SW_OTEL_RECEIVER: default - SW_OTEL_RECEIVER_ENABLED_OC_RULES: postgresql + - TZ=Asia/Shanghai + - SW_STORAGE=postgresql + - SW_JDBC_URL=jdbc:postgresql://postgres:5432/skywalking + - SW_DATA_SOURCE_USER=postgres + - SW_DATA_SOURCE_PASSWORD=123456 + - SW_CORE_TOPN_REPORT_PERIOD=2 ports: - 12800 + depends_on: + postgres: + condition: service_healthy postgres: image: postgres:14.1 environment: - - POSTGRES_PASSWORD=password + - TZ=Asia/Shanghai + - POSTGRES_PASSWORD=123456 + - POSTGRES_DB=skywalking ports: - 5432 + command: postgres -c config_file=/etc/postgresql.conf + volumes: + - ./postgresql.conf:/etc/postgresql.conf + - ./data:/var/lib/postgresql/data networks: - e2e + healthcheck: + test: ["CMD", "bash", "-c", "cat < /dev/null > /dev/tcp/127.0.0.1/5432"] + interval: 5s + timeout: 60s + retries: 120 postgres-load: image: postgres:14.1 environment: - - PGPASSWORD=password + - PGPASSWORD=123456 depends_on: - otel-collector networks: @@ -46,7 +64,7 @@ services: - ../postgres-exporter:/docker command: - -c - - "psql -f /docker/mock.sql -h postgres -p 5432 -U postgres postgres" + - "psql -f /docker/mock.sql -h postgres -p 5432 -U postgres" postgres-exporter: image: quay.io/prometheuscommunity/postgres-exporter:v0.11.0 depends_on: @@ -54,9 +72,23 @@ services: ports: - 9187 environment: - - DATA_SOURCE_NAME=postgresql://postgres:password@postgres:5432/postgres?sslmode=disable + - DATA_SOURCE_NAME=postgresql://postgres:123456@postgres:5432/postgres?sslmode=disable networks: - e2e + fluentbit: + image: fluent/fluent-bit:1.9 + environment: + - TZ=Asia/Shanghai + volumes: + - ./data/log:/tmp/skywalking-logs + - ./fluent-bit.conf:/fluent-bit/etc/fluent-bit.conf + - ./fluent-bit-parser.conf:/fluent-bit/etc/fluent-bit-parser.conf + - ./fluent-bit-script.lua:/fluent-bit/etc/fluent-bit-script.lua + networks: + - e2e + depends_on: + postgres: + condition: service_healthy otel-collector: image: otel/opentelemetry-collector:0.50.0 diff --git a/test/e2e-v2/cases/postgresql/postgres-exporter/e2e.yaml b/test/e2e-v2/cases/postgresql/postgres-exporter/e2e.yaml index a7e128296a..14fcf73f55 100644 --- a/test/e2e-v2/cases/postgresql/postgres-exporter/e2e.yaml +++ b/test/e2e-v2/cases/postgresql/postgres-exporter/e2e.yaml @@ -30,7 +30,7 @@ setup: verify: retry: - count: 20 + count: 60 interval: 3s cases: - includes: diff --git a/test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit-parser.conf b/test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit-parser.conf new file mode 100644 index 0000000000..1a7bb091d5 --- /dev/null +++ b/test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit-parser.conf @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[PARSER] + name my-log-format + format regex + regex \w*-\w*-\w* \w*:\w*:\w*.\w* UTCLOG: duration: \w*.\w* ms statement.* diff --git a/test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit-script.lua b/test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit-script.lua new file mode 100644 index 0000000000..3c1ba54983 --- /dev/null +++ b/test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit-script.lua @@ -0,0 +1,100 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +function rewrite_body(tag, timestamp, record) + log = record["log"] + record["log"] = nil + record["date"] = nil + record["tags"] = {data={{key="LOG_KIND", value="SLOW_SQL"}}} + arr = split(log,"\n") + re1 = {} + + re1["time"] = os.time() + + re1["layer"] = "POSTGRESQL" + record["layer"] = "POSTGRESQL" + _,durationIndex = string.find(log,"duration: ") + msIndex,_ = string.find(log," ms") + duration = string.sub(log,durationIndex+1,msIndex) + _,statementAf = string.find(log,"statement: ") + re1["statement"] = string.sub(log,statementAf+1) + duration = string.sub(log,durationIndex+1,msIndex-1) + d1 = math.floor(tonumber(duration)) + re1["query_time"] = d1 + + record["service"]="postgresql::postgres:5432" + re1["service"]= "postgresql::postgres:5432" + + re1["id"] = uuid() + + jsonstr = table2json(re1) + record["body"]={json={}} + record["body"]["json"]["json"] = jsonstr + return 1, timestamp, record +end +function split(input, delimiter) + input = tostring(input) + delimiter = tostring(delimiter) + if (delimiter == "") then return false end + local pos, arr = 0, {} + for st, sp in function() return string.find(input, delimiter, pos, true) end do + table.insert(arr, string.sub(input, pos, st - 1)) + pos = sp + 1 + end + table.insert(arr, string.sub(input, pos)) + return arr +end + +function uuid() + local seed={'e','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'} + local tb={} + for i=1,32 do + table.insert(tb,seed[math.random(1,16)]) + end + local sid=table.concat(tb) + return string.format('%s-%s-%s-%s-%s', + string.sub(sid,1,8), + string.sub(sid,9,12), + string.sub(sid,13,16), + string.sub(sid,17,20), + string.sub(sid,21,32) + ) +end + +function table2json(t) + local function serialize(tbl) + local tmp = {} + for k, v in pairs(tbl) do + local k_type = type(k) + local v_type = type(v) + local key = (k_type == "string" and '"' .. k .. '":') or (k_type == "number" and "") + local value = + (v_type == "table" and serialize(v)) or (v_type == "boolean" and tostring(v)) or + (v_type == "string" and '"' .. v .. '"') or + (v_type == "number" and v) + tmp[#tmp + 1] = key and value and tostring(key) .. tostring(value) or nil + end + if table.maxn(tbl) == 0 then + return "{" .. table.concat(tmp, ",") .. "}" + else + return "[" .. table.concat(tmp, ",") .. "]" + end + end + assert(type(t) == "table") + return serialize(t) +end + diff --git a/test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit.conf b/test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit.conf new file mode 100644 index 0000000000..4a61d641ba --- /dev/null +++ b/test/e2e-v2/cases/postgresql/postgres-exporter/fluent-bit.conf @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[SERVICE] + flush 1 + log_level info + parsers_File fluent-bit-parser.conf +[INPUT] + name tail + path /tmp/skywalking-logs/slow.log + parser my-log-format +[FILTER] + name grep + match * + regex log \w*-\w*-\w* \w*:\w*:\w*.\w* UTCLOG: duration: \w*.\w* ms statement.* +[FILTER] + name lua + match * + script fluent-bit-script.lua + call rewrite_body +[OUTPUT] + name stdout + match * + format json +[OUTPUT] + name http + match * + host oap + port 12800 + uri /v3/logs + format json diff --git a/test/e2e-v2/cases/postgresql/postgres-exporter/postgresql.conf b/test/e2e-v2/cases/postgresql/postgres-exporter/postgresql.conf new file mode 100644 index 0000000000..2f6d8291b0 --- /dev/null +++ b/test/e2e-v2/cases/postgresql/postgres-exporter/postgresql.conf @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +listen_addresses = '*' +max_connections = 100 + +shared_buffers = 128MB + +max_wal_size = 1GB +min_wal_size = 80MB + +log_timezone = 'Etc/UTC' + +datestyle = 'iso, mdy' + +timezone = 'Etc/UTC' + +lc_messages = 'en_US.utf8' + +lc_monetary = 'en_US.utf8' +lc_numeric = 'en_US.utf8' +lc_time = 'en_US.utf8' + +default_text_search_config = 'pg_catalog.english' + +logging_collector = on +log_directory = 'log' +log_filename = 'slow.log' +log_rotation_age = 1d +log_rotation_size = 10MB +log_min_messages = info + +log_min_duration_statement = 1000 +log_checkpoints = on +log_connections = on +log_disconnections = on +log_duration = on +log_line_prefix = '%m' \ No newline at end of file diff --git a/test/e2e-v2/cases/postgresql/postgresql-cases.yaml b/test/e2e-v2/cases/postgresql/postgresql-cases.yaml index 3cebc63fba..33d0c3a46c 100644 --- a/test/e2e-v2/cases/postgresql/postgresql-cases.yaml +++ b/test/e2e-v2/cases/postgresql/postgresql-cases.yaml @@ -56,3 +56,6 @@ - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics multiple-linear --name=meter_pg_active_sessions --labels=postgres --service-name=postgresql::postgres:5432 |yq e 'to_entries | with(.[] ; .value=(.value | to_entries))' - expected: expected/metrics-has-value-label.yml + # slow sql + - query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics sampled-record --name=top_n_database_statement |yq e 'to_entries | with(.[] ; .value=(.value | to_entries))' - + expected: expected/db-has-value.yml -- GitLab