From b6661b5164fac59e754009fa6ef646aaed8929a7 Mon Sep 17 00:00:00 2001 From: Gao Hongtao Date: Fri, 10 Jul 2020 00:30:43 +0800 Subject: [PATCH] Add health checker module (#5046) --- docs/en/setup/backend/backend-health-check.md | 62 ++++++++++++ oap-server/pom.xml | 1 + .../src/main/resources/application.yml | 5 + .../server/core/query/type/HealthStatus.java | 30 ++++++ .../client/RemoteClientManagerTestCase.java | 5 + oap-server/server-health-checker/pom.xml | 40 ++++++++ .../checker/module/HealthCheckerModule.java | 38 +++++++ .../checker/provider/HealthCheckerConfig.java | 30 ++++++ .../provider/HealthCheckerProvider.java | 98 +++++++++++++++++++ .../checker/provider/HealthQueryService.java | 38 +++++++ ...ing.oap.server.library.module.ModuleDefine | 19 ++++ ...g.oap.server.library.module.ModuleProvider | 19 ++++ .../jdbc/hikaricp/JDBCHikariCPClient.java | 15 +++ .../query-graphql-plugin/pom.xml | 5 + .../query/graphql/GraphQLQueryProvider.java | 3 +- .../query/graphql/resolver/HealthQuery.java | 47 +++++++++ .../src/main/resources/query-protocol | 2 +- .../plugin/jdbc/h2/H2StorageProvider.java | 14 +++ .../server/telemetry/api/GaugeMetrics.java | 5 + .../server/telemetry/api/MetricsCreator.java | 30 ++++++ .../telemetry/none/MetricsCreatorNoop.java | 5 + .../prometheus/PrometheusGaugeMetrics.java | 6 ++ .../PrometheusMetricsCollector.java | 46 +++++++++ .../PrometheusTelemetryProvider.java | 3 +- 24 files changed, 562 insertions(+), 4 deletions(-) create mode 100644 docs/en/setup/backend/backend-health-check.md create mode 100644 oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/query/type/HealthStatus.java create mode 100644 oap-server/server-health-checker/pom.xml create mode 100644 oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/module/HealthCheckerModule.java create mode 100644 oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthCheckerConfig.java create mode 100644 oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthCheckerProvider.java create mode 100644 oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthQueryService.java create mode 100644 oap-server/server-health-checker/src/main/resources/META-INF/services/org.apache.skywalking.oap.server.library.module.ModuleDefine create mode 100644 oap-server/server-health-checker/src/main/resources/META-INF/services/org.apache.skywalking.oap.server.library.module.ModuleProvider create mode 100644 oap-server/server-query-plugin/query-graphql-plugin/src/main/java/org/apache/skywalking/oap/query/graphql/resolver/HealthQuery.java create mode 100644 oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusMetricsCollector.java diff --git a/docs/en/setup/backend/backend-health-check.md b/docs/en/setup/backend/backend-health-check.md new file mode 100644 index 0000000000..6a663ef09c --- /dev/null +++ b/docs/en/setup/backend/backend-health-check.md @@ -0,0 +1,62 @@ +# Health Check + +Health check intends to provide a unique approach to check the healthy status of OAP server. It includes the health status +of modules, GraphQL and gRPC services readiness. + +## Health Checker Module. + +Health Checker module could solute how to observe the health status of modules. We can active it by below: +```yaml +health-checker: + selector: ${SW_HEALTH_CHECKER:default} + default: + checkIntervalSeconds: ${SW_HEALTH_CHECKER_INTERVAL_SECONDS:5} +``` +Notice, we should enable `telemetry` module at the same time. That means the provider should not be `-` and `none`. + +After that, we can query OAP server health status by querying GraphQL: + +``` +query{ + checkHealth{ + score + details + } +} +``` + +If the OAP server is healthy, the response should be + +```json +{ + "data": { + "checkHealth": { + "score": 0, + "details": "" + } + } +} +``` + +Once some modules are unhealthy, for instance, storage H2 is down. The result might be like below: + +```json +{ + "data": { + "checkHealth": { + "score": 1, + "details": "storage_h2," + } + } +} +``` +You could refer to [checkHealth query](https://github.com/apache/skywalking-query-protocol/blob/master/common.graphql) +for more details. + +## The readiness of GraphQL and gRPC + +We could opt to above query to check the readiness of GraphQL. + +OAP has implemented [gRPC Health Checking Protocol](https://github.com/grpc/grpc/blob/master/doc/health-checking.md). +We could use [grpc-health-probe](https://github.com/grpc-ecosystem/grpc-health-probe) or any other tools to check the +health of OAP gRPC services. diff --git a/oap-server/pom.xml b/oap-server/pom.xml index f7fbf6a1bb..ee05cb482c 100755 --- a/oap-server/pom.xml +++ b/oap-server/pom.xml @@ -46,6 +46,7 @@ server-bootstrap server-tools server-fetcher-plugin + server-health-checker diff --git a/oap-server/server-bootstrap/src/main/resources/application.yml b/oap-server/server-bootstrap/src/main/resources/application.yml index 06f183ed79..0e89003b2c 100755 --- a/oap-server/server-bootstrap/src/main/resources/application.yml +++ b/oap-server/server-bootstrap/src/main/resources/application.yml @@ -270,3 +270,8 @@ exporter: grpc: targetHost: ${SW_EXPORTER_GRPC_HOST:127.0.0.1} targetPort: ${SW_EXPORTER_GRPC_PORT:9870} + +health-checker: + selector: ${SW_HEALTH_CHECKER:-} + default: + checkIntervalSeconds: ${SW_HEALTH_CHECKER_INTERVAL_SECONDS:5} diff --git a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/query/type/HealthStatus.java b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/query/type/HealthStatus.java new file mode 100644 index 0000000000..f32ccf73c3 --- /dev/null +++ b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/query/type/HealthStatus.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.oap.server.core.query.type; + +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +public class HealthStatus { + // score == 0 means healthy, otherwise it's unhealthy. + private int score; + private String details; +} diff --git a/oap-server/server-core/src/test/java/org/apache/skywalking/oap/server/core/remote/client/RemoteClientManagerTestCase.java b/oap-server/server-core/src/test/java/org/apache/skywalking/oap/server/core/remote/client/RemoteClientManagerTestCase.java index 996286a72b..01858e2a49 100644 --- a/oap-server/server-core/src/test/java/org/apache/skywalking/oap/server/core/remote/client/RemoteClientManagerTestCase.java +++ b/oap-server/server-core/src/test/java/org/apache/skywalking/oap/server/core/remote/client/RemoteClientManagerTestCase.java @@ -90,6 +90,11 @@ public class RemoteClientManagerTestCase { public void setValue(double value) { } + + @Override + public double getValue() { + return 0; + } }); ModuleDefineTesting telemetryModuleDefine = new ModuleDefineTesting(); moduleManager.put(TelemetryModule.NAME, telemetryModuleDefine); diff --git a/oap-server/server-health-checker/pom.xml b/oap-server/server-health-checker/pom.xml new file mode 100644 index 0000000000..aa8c8c4c61 --- /dev/null +++ b/oap-server/server-health-checker/pom.xml @@ -0,0 +1,40 @@ + + + + + + oap-server + org.apache.skywalking + 8.1.0-SNAPSHOT + + 4.0.0 + + server-health-checker + + + + org.apache.skywalking + server-core + ${project.version} + + + + \ No newline at end of file diff --git a/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/module/HealthCheckerModule.java b/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/module/HealthCheckerModule.java new file mode 100644 index 0000000000..4ecf78a41d --- /dev/null +++ b/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/module/HealthCheckerModule.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.oap.server.health.checker.module; + +import org.apache.skywalking.oap.server.health.checker.provider.HealthQueryService; +import org.apache.skywalking.oap.server.library.module.ModuleDefine; + +/** + * HealthCheckerModule intends to provide a channel to expose the healthy status of modules to external. + */ +public class HealthCheckerModule extends ModuleDefine { + public static final String NAME = "health-checker"; + + public HealthCheckerModule() { + super(NAME); + } + + @Override + public Class[] services() { + return new Class[]{HealthQueryService.class}; + } +} diff --git a/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthCheckerConfig.java b/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthCheckerConfig.java new file mode 100644 index 0000000000..14e36c14ad --- /dev/null +++ b/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthCheckerConfig.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.oap.server.health.checker.provider; + +import lombok.Getter; +import org.apache.skywalking.oap.server.library.module.ModuleConfig; + +/** + * The Configuration of health checker module. + */ +@Getter +public class HealthCheckerConfig extends ModuleConfig { + private long checkIntervalSeconds = 5; +} diff --git a/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthCheckerProvider.java b/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthCheckerProvider.java new file mode 100644 index 0000000000..b4829d826a --- /dev/null +++ b/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthCheckerProvider.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.oap.server.health.checker.provider; + +import com.google.common.util.concurrent.AtomicDouble; +import io.vavr.collection.Stream; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.skywalking.oap.server.health.checker.module.HealthCheckerModule; +import org.apache.skywalking.oap.server.library.module.ModuleConfig; +import org.apache.skywalking.oap.server.library.module.ModuleDefine; +import org.apache.skywalking.oap.server.library.module.ModuleProvider; +import org.apache.skywalking.oap.server.library.module.ModuleServiceHolder; +import org.apache.skywalking.oap.server.library.module.ModuleStartException; +import org.apache.skywalking.oap.server.library.module.ServiceNotProvidedException; +import org.apache.skywalking.oap.server.telemetry.TelemetryModule; +import org.apache.skywalking.oap.server.telemetry.api.MetricsCollector; +import org.apache.skywalking.oap.server.telemetry.api.MetricsCreator; + +/** + * HealthCheckerProvider fetches health check metrics from telemetry module, then calculates health score and generates + * details explains the score. External service or users can query health status by HealthCheckerService. + */ +@Slf4j +public class HealthCheckerProvider extends ModuleProvider { + private final AtomicDouble score = new AtomicDouble(); + private final AtomicReference details = new AtomicReference<>(); + private final HealthCheckerConfig config = new HealthCheckerConfig(); + private MetricsCollector collector; + private MetricsCreator metricsCreator; + private ScheduledExecutorService ses; + + @Override public String name() { + return "default"; + } + + @Override public Class module() { + return HealthCheckerModule.class; + } + + @Override public ModuleConfig createConfigBeanIfAbsent() { + return config; + } + + @Override public void prepare() throws ServiceNotProvidedException, ModuleStartException { + score.set(-1); + ses = Executors.newSingleThreadScheduledExecutor(); + this.registerServiceImplementation(HealthQueryService.class, new HealthQueryService(score, details)); + } + + @Override public void start() throws ServiceNotProvidedException, ModuleStartException { + ModuleServiceHolder telemetry = getManager().find(TelemetryModule.NAME).provider(); + metricsCreator = telemetry.getService(MetricsCreator.class); + collector = telemetry.getService(MetricsCollector.class); + } + + @Override public void notifyAfterCompleted() throws ServiceNotProvidedException, ModuleStartException { + ses.scheduleAtFixedRate(() -> { + StringBuilder unhealthyModules = new StringBuilder(); + score.set(Stream.ofAll(collector.collect()) + .flatMap(metricFamily -> metricFamily.samples) + .filter(sample -> metricsCreator.isHealthCheckerMetrics(sample.name)) + .peek(sample -> { + if (sample.value > 0.0) { + unhealthyModules.append(metricsCreator.extractModuleName(sample.name)).append(","); + } + }) + .map(sample -> sample.value) + .collect(Collectors.summingDouble(Double::doubleValue))); + details.set(unhealthyModules.toString()); + }, + 2, config.getCheckIntervalSeconds(), TimeUnit.SECONDS); + } + + @Override public String[] requiredModules() { + return new String[]{TelemetryModule.NAME}; + } +} diff --git a/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthQueryService.java b/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthQueryService.java new file mode 100644 index 0000000000..1899cab30c --- /dev/null +++ b/oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthQueryService.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.oap.server.health.checker.provider; + +import com.google.common.util.concurrent.AtomicDouble; +import java.util.concurrent.atomic.AtomicReference; +import lombok.RequiredArgsConstructor; +import org.apache.skywalking.oap.server.core.query.type.HealthStatus; +import org.apache.skywalking.oap.server.library.module.Service; + +@RequiredArgsConstructor +public class HealthQueryService implements Service { + private final AtomicDouble score; + private final AtomicReference details; + + public HealthStatus checkHealth() { + HealthStatus s = new HealthStatus(); + s.setScore(score.intValue()); + s.setDetails(details.get()); + return s; + } +} diff --git a/oap-server/server-health-checker/src/main/resources/META-INF/services/org.apache.skywalking.oap.server.library.module.ModuleDefine b/oap-server/server-health-checker/src/main/resources/META-INF/services/org.apache.skywalking.oap.server.library.module.ModuleDefine new file mode 100644 index 0000000000..8dc18f0511 --- /dev/null +++ b/oap-server/server-health-checker/src/main/resources/META-INF/services/org.apache.skywalking.oap.server.library.module.ModuleDefine @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +org.apache.skywalking.oap.server.health.checker.module.HealthCheckerModule diff --git a/oap-server/server-health-checker/src/main/resources/META-INF/services/org.apache.skywalking.oap.server.library.module.ModuleProvider b/oap-server/server-health-checker/src/main/resources/META-INF/services/org.apache.skywalking.oap.server.library.module.ModuleProvider new file mode 100644 index 0000000000..35238159f0 --- /dev/null +++ b/oap-server/server-health-checker/src/main/resources/META-INF/services/org.apache.skywalking.oap.server.library.module.ModuleProvider @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +org.apache.skywalking.oap.server.health.checker.provider.HealthCheckerProvider diff --git a/oap-server/server-library/library-client/src/main/java/org/apache/skywalking/oap/server/library/client/jdbc/hikaricp/JDBCHikariCPClient.java b/oap-server/server-library/library-client/src/main/java/org/apache/skywalking/oap/server/library/client/jdbc/hikaricp/JDBCHikariCPClient.java index 95575710d6..f99a6a0527 100644 --- a/oap-server/server-library/library-client/src/main/java/org/apache/skywalking/oap/server/library/client/jdbc/hikaricp/JDBCHikariCPClient.java +++ b/oap-server/server-library/library-client/src/main/java/org/apache/skywalking/oap/server/library/client/jdbc/hikaricp/JDBCHikariCPClient.java @@ -26,6 +26,10 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.Properties; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; import org.apache.skywalking.oap.server.library.client.Client; import org.apache.skywalking.oap.server.library.client.jdbc.JDBCClientException; import org.slf4j.Logger; @@ -44,6 +48,17 @@ public class JDBCHikariCPClient implements Client { hikariConfig = new HikariConfig(properties); } + public void setHealthCheckListener(Consumer healthListener) { + ScheduledExecutorService asyncHealthScheduler = Executors.newSingleThreadScheduledExecutor(); + asyncHealthScheduler.scheduleAtFixedRate(() -> { + try (Connection c = dataSource.getConnection()) { + healthListener.accept(true); + } catch (SQLException ignored) { + healthListener.accept(false); + } + }, 0, 3, TimeUnit.SECONDS); + } + @Override public void connect() { dataSource = new HikariDataSource(hikariConfig); diff --git a/oap-server/server-query-plugin/query-graphql-plugin/pom.xml b/oap-server/server-query-plugin/query-graphql-plugin/pom.xml index d814975c00..54f380f523 100644 --- a/oap-server/server-query-plugin/query-graphql-plugin/pom.xml +++ b/oap-server/server-query-plugin/query-graphql-plugin/pom.xml @@ -33,6 +33,11 @@ server-core ${project.version} + + org.apache.skywalking + server-health-checker + ${project.version} + com.graphql-java graphql-java diff --git a/oap-server/server-query-plugin/query-graphql-plugin/src/main/java/org/apache/skywalking/oap/query/graphql/GraphQLQueryProvider.java b/oap-server/server-query-plugin/query-graphql-plugin/src/main/java/org/apache/skywalking/oap/query/graphql/GraphQLQueryProvider.java index 7a5ba9136d..145559db11 100644 --- a/oap-server/server-query-plugin/query-graphql-plugin/src/main/java/org/apache/skywalking/oap/query/graphql/GraphQLQueryProvider.java +++ b/oap-server/server-query-plugin/query-graphql-plugin/src/main/java/org/apache/skywalking/oap/query/graphql/GraphQLQueryProvider.java @@ -23,6 +23,7 @@ import graphql.GraphQL; import graphql.schema.GraphQLSchema; import org.apache.skywalking.oap.query.graphql.resolver.AggregationQuery; import org.apache.skywalking.oap.query.graphql.resolver.AlarmQuery; +import org.apache.skywalking.oap.query.graphql.resolver.HealthQuery; import org.apache.skywalking.oap.query.graphql.resolver.LogQuery; import org.apache.skywalking.oap.query.graphql.resolver.MetadataQuery; import org.apache.skywalking.oap.query.graphql.resolver.MetricQuery; @@ -72,7 +73,7 @@ public class GraphQLQueryProvider extends ModuleProvider { public void prepare() throws ServiceNotProvidedException, ModuleStartException { GraphQLSchema schema = SchemaParser.newParser() .file("query-protocol/common.graphqls") - .resolvers(new Query(), new Mutation()) + .resolvers(new Query(), new Mutation(), new HealthQuery(getManager())) .file("query-protocol/metadata.graphqls") .resolvers(new MetadataQuery(getManager())) .file("query-protocol/topology.graphqls") diff --git a/oap-server/server-query-plugin/query-graphql-plugin/src/main/java/org/apache/skywalking/oap/query/graphql/resolver/HealthQuery.java b/oap-server/server-query-plugin/query-graphql-plugin/src/main/java/org/apache/skywalking/oap/query/graphql/resolver/HealthQuery.java new file mode 100644 index 0000000000..0521248202 --- /dev/null +++ b/oap-server/server-query-plugin/query-graphql-plugin/src/main/java/org/apache/skywalking/oap/query/graphql/resolver/HealthQuery.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.oap.query.graphql.resolver; + +import com.coxautodev.graphql.tools.GraphQLQueryResolver; +import java.util.Optional; +import lombok.RequiredArgsConstructor; +import org.apache.skywalking.oap.server.core.query.type.HealthStatus; +import org.apache.skywalking.oap.server.health.checker.module.HealthCheckerModule; +import org.apache.skywalking.oap.server.health.checker.provider.HealthQueryService; +import org.apache.skywalking.oap.server.library.module.ModuleManager; + +@RequiredArgsConstructor +public class HealthQuery implements GraphQLQueryResolver { + + private final ModuleManager moduleManager; + + private HealthQueryService service; + + private HealthQueryService getService() { + return Optional.ofNullable(service) + .orElseGet(() -> { + service = moduleManager.find(HealthCheckerModule.NAME).provider().getService(HealthQueryService.class); + return service; + }); + } + + public HealthStatus checkHealth() { + return getService().checkHealth(); + } +} diff --git a/oap-server/server-query-plugin/query-graphql-plugin/src/main/resources/query-protocol b/oap-server/server-query-plugin/query-graphql-plugin/src/main/resources/query-protocol index bea847b90e..563bb51c71 160000 --- a/oap-server/server-query-plugin/query-graphql-plugin/src/main/resources/query-protocol +++ b/oap-server/server-query-plugin/query-graphql-plugin/src/main/resources/query-protocol @@ -1 +1 @@ -Subproject commit bea847b90e08c07a5407c4121fe4cec1eec77a78 +Subproject commit 563bb51c71922f017911345d7cd5c62a7ac8995c diff --git a/oap-server/server-storage-plugin/storage-jdbc-hikaricp-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/jdbc/h2/H2StorageProvider.java b/oap-server/server-storage-plugin/storage-jdbc-hikaricp-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/jdbc/h2/H2StorageProvider.java index af5b6e57a2..067bf2ce8d 100644 --- a/oap-server/server-storage-plugin/storage-jdbc-hikaricp-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/jdbc/h2/H2StorageProvider.java +++ b/oap-server/server-storage-plugin/storage-jdbc-hikaricp-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/jdbc/h2/H2StorageProvider.java @@ -63,6 +63,10 @@ import org.apache.skywalking.oap.server.storage.plugin.jdbc.h2.dao.H2TopNRecords import org.apache.skywalking.oap.server.storage.plugin.jdbc.h2.dao.H2TopologyQueryDAO; import org.apache.skywalking.oap.server.storage.plugin.jdbc.h2.dao.H2TraceQueryDAO; import org.apache.skywalking.oap.server.storage.plugin.jdbc.h2.dao.H2UITemplateManagementDAO; +import org.apache.skywalking.oap.server.telemetry.TelemetryModule; +import org.apache.skywalking.oap.server.telemetry.api.GaugeMetrics; +import org.apache.skywalking.oap.server.telemetry.api.MetricsCreator; +import org.apache.skywalking.oap.server.telemetry.api.MetricsTag; /** * H2 Storage provider is for demonstration and preview only. I will find that haven't implemented several interfaces, @@ -131,6 +135,9 @@ public class H2StorageProvider extends ModuleProvider { @Override public void start() throws ServiceNotProvidedException, ModuleStartException { + MetricsCreator metricCreator = getManager().find(TelemetryModule.NAME).provider().getService(MetricsCreator.class); + GaugeMetrics healthChecker = metricCreator.createHealthCheckerGauge("storage_h2", MetricsTag.EMPTY_KEY, MetricsTag.EMPTY_VALUE); + healthChecker.setValue(1); try { h2Client.connect(); @@ -139,6 +146,13 @@ public class H2StorageProvider extends ModuleProvider { } catch (StorageException e) { throw new ModuleStartException(e.getMessage(), e); } + h2Client.setHealthCheckListener(isHealthy -> { + if (isHealthy) { + healthChecker.setValue(0); + } else { + healthChecker.setValue(1); + } + }); } @Override diff --git a/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/api/GaugeMetrics.java b/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/api/GaugeMetrics.java index 03dfa58469..bd00248af8 100644 --- a/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/api/GaugeMetrics.java +++ b/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/api/GaugeMetrics.java @@ -46,4 +46,9 @@ public interface GaugeMetrics { * Set the given value to the gauge */ void setValue(double value); + + /** + * Get the current value of the gauge + */ + double getValue(); } diff --git a/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/api/MetricsCreator.java b/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/api/MetricsCreator.java index 6e7ff69412..11d14ff9f1 100644 --- a/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/api/MetricsCreator.java +++ b/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/api/MetricsCreator.java @@ -18,6 +18,8 @@ package org.apache.skywalking.oap.server.telemetry.api; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; import org.apache.skywalking.oap.server.library.module.Service; /** @@ -25,6 +27,8 @@ import org.apache.skywalking.oap.server.library.module.Service; * project, and plan to move to openmetrics APIs after it is ready. */ public interface MetricsCreator extends Service { + + String HEALTH_METRIC_PREFIX = "health_check_"; /** * Create a counter type metrics instance. */ @@ -42,4 +46,30 @@ public interface MetricsCreator extends Service { */ HistogramMetrics createHistogramMetric(String name, String tips, MetricsTag.Keys tagKeys, MetricsTag.Values tagValues, double... buckets); + + /** + * Create a Health Check gauge. + */ + default GaugeMetrics createHealthCheckerGauge(String name, MetricsTag.Keys tagKeys, MetricsTag.Values tagValues) { + Preconditions.checkArgument(!Strings.isNullOrEmpty(name), "Require non-null or empty metric name"); + return createGauge(Strings.lenientFormat("%s%s", HEALTH_METRIC_PREFIX, name), + Strings.lenientFormat("%s health check", name), + tagKeys, tagValues); + } + + /** + * Find out whether it's a health check metric. + */ + default boolean isHealthCheckerMetrics(String name) { + Preconditions.checkArgument(!Strings.isNullOrEmpty(name), "Require non-null or empty metric name"); + return name.startsWith(HEALTH_METRIC_PREFIX); + } + + /** + * Extract the raw module name + */ + default String extractModuleName(String metricName) { + Preconditions.checkArgument(!Strings.isNullOrEmpty(metricName), "Require non-null or empty metric name"); + return metricName.replace(HEALTH_METRIC_PREFIX, ""); + } } diff --git a/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/none/MetricsCreatorNoop.java b/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/none/MetricsCreatorNoop.java index 6f3efd4717..62509e97ea 100644 --- a/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/none/MetricsCreatorNoop.java +++ b/oap-server/server-telemetry/telemetry-api/src/main/java/org/apache/skywalking/oap/server/telemetry/none/MetricsCreatorNoop.java @@ -71,6 +71,11 @@ public class MetricsCreatorNoop implements MetricsCreator { public void setValue(double value) { } + + @Override + public double getValue() { + return 0; + } }; } diff --git a/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusGaugeMetrics.java b/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusGaugeMetrics.java index bfb0f578b3..194d15bd19 100644 --- a/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusGaugeMetrics.java +++ b/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusGaugeMetrics.java @@ -19,6 +19,7 @@ package org.apache.skywalking.oap.server.telemetry.prometheus; import io.prometheus.client.Gauge; +import java.util.Optional; import org.apache.skywalking.oap.server.telemetry.api.GaugeMetrics; import org.apache.skywalking.oap.server.telemetry.api.MetricsTag; @@ -70,6 +71,11 @@ public class PrometheusGaugeMetrics extends BaseMetrics impl } } + @Override + public double getValue() { + return Optional.ofNullable(this.getMetric()).orElse(new Gauge.Child()).get(); + } + @Override protected Gauge create(String[] labelNames) { return Gauge.build().name(name).help(tips).labelNames(labelNames).register(); diff --git a/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusMetricsCollector.java b/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusMetricsCollector.java new file mode 100644 index 0000000000..382ce83878 --- /dev/null +++ b/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusMetricsCollector.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.oap.server.telemetry.prometheus; + +import io.prometheus.client.Collector; +import io.prometheus.client.CollectorRegistry; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.LinkedList; +import java.util.List; +import org.apache.skywalking.oap.server.telemetry.api.MetricFamily; +import org.apache.skywalking.oap.server.telemetry.api.MetricsCollector; + +public class PrometheusMetricsCollector implements MetricsCollector { + @Override public Iterable collect() { + Enumeration mfs = CollectorRegistry.defaultRegistry.metricFamilySamples(); + List result = new LinkedList<>(); + while (mfs.hasMoreElements()) { + Collector.MetricFamilySamples metricFamilySamples = mfs.nextElement(); + List samples = new ArrayList<>(metricFamilySamples.samples.size()); + MetricFamily m = new MetricFamily(metricFamilySamples.name, MetricFamily.Type.valueOf(metricFamilySamples.type + .name()), metricFamilySamples.help, samples); + result.add(m); + for (Collector.MetricFamilySamples.Sample sample : metricFamilySamples.samples) { + samples.add(new MetricFamily.Sample(sample.name, sample.labelNames, sample.labelValues, sample.value, sample.timestampMs)); + } + } + return result; + } +} diff --git a/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusTelemetryProvider.java b/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusTelemetryProvider.java index 5fb6aa2068..47135681cd 100644 --- a/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusTelemetryProvider.java +++ b/oap-server/server-telemetry/telemetry-prometheus/src/main/java/org/apache/skywalking/oap/server/telemetry/prometheus/PrometheusTelemetryProvider.java @@ -29,7 +29,6 @@ import org.apache.skywalking.oap.server.library.module.ServiceNotProvidedExcepti import org.apache.skywalking.oap.server.telemetry.TelemetryModule; import org.apache.skywalking.oap.server.telemetry.api.MetricsCollector; import org.apache.skywalking.oap.server.telemetry.api.MetricsCreator; -import org.apache.skywalking.oap.server.telemetry.none.MetricsCollectorNoop; /** * Start the Prometheus @@ -59,7 +58,7 @@ public class PrometheusTelemetryProvider extends ModuleProvider { @Override public void prepare() throws ServiceNotProvidedException, ModuleStartException { this.registerServiceImplementation(MetricsCreator.class, new PrometheusMetricsCreator()); - this.registerServiceImplementation(MetricsCollector.class, new MetricsCollectorNoop()); + this.registerServiceImplementation(MetricsCollector.class, new PrometheusMetricsCollector()); try { new HTTPServer(config.getHost(), config.getPort()); } catch (IOException e) { -- GitLab