未验证 提交 9ca1eb96 编写于 作者: E Eric Gao 提交者: GitHub

[Improvement][Metrics] Add metrics for alert server (#11240)

* [Improvement][Metrics] Add metrics for alert server (#11131)

* Update related docs of metrics

* Add grafana demo dashboards for alert server metrics

* Refactor metric classes with UtilityClass annotation

* Refactor meter names in camelCase for checkstyle
上级 adfec419
......@@ -115,9 +115,10 @@ For example, you can get the master metrics by `curl http://localhost:5679/actua
### Alert Server Related
- Currently, we have not embedded any metrics in Alert Server.
- ds.alert.send.count: (counter) the number of sent alerts, sliced by tag `status`
- ds.alert.pending: (gauge) the number of alerts waiting to get fired
In each server, there are some default system-level metrics related to `database connection`, `JVM`, etc. We list them below for your reference:
**In each server, there are some default system-level metrics related to `database connection`, `JVM`, etc. We list them below for your reference:**
### Database Related Metrics (Default)
......
......@@ -116,9 +116,10 @@ metrics exporter端口`server.port`是在application.yaml里定义的: master: `
### Alert Server指标
- 目前我们尚未提供任何Alert Server指标
- ds.alert.send.count: (counter) 已发送的告警数量,可由标签`status`切分
- ds.alert.pending: (gauge) 等待发送的告警数量
在每个server中都有一些系统层面(如数据库链接、JVM)的默认指标,为了您的检阅方便,我们也将它们列在了这里:
**在每个server中都有一些系统层面(如数据库链接、JVM)的默认指标,为了您的检阅方便,我们也将它们列在了这里:**
### 数据库相关指标(默认)
......
......@@ -78,6 +78,7 @@ public final class AlertSenderService extends Thread {
while (Stopper.isRunning()) {
try {
List<Alert> alerts = alertDao.listPendingAlerts();
AlertServerMetrics.registerPendingAlertGauge(alerts::size);
this.send(alerts);
ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS * 5L);
} catch (Exception e) {
......@@ -117,6 +118,9 @@ public final class AlertSenderService extends Thread {
alertDao.addAlertSendStatus(sendStatus, JSONUtils.toJsonString(alertResult), alertId, instance.getId());
if (sendStatus.equals(AlertStatus.EXECUTION_SUCCESS)) {
sendSuccessCount++;
AlertServerMetrics.incAlertSuccessCount();
} else {
AlertServerMetrics.incAlertFailCount();
}
alertResults.add(alertResult);
}
......
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.alert;
import java.util.function.Supplier;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.Metrics;
import lombok.experimental.UtilityClass;
@UtilityClass
public class AlertServerMetrics {
private final Counter alertSuccessCounter =
Counter.builder("ds.alert.send.count")
.tag("status", "success")
.description("Alert success count")
.register(Metrics.globalRegistry);
private final Counter alertFailCounter =
Counter.builder("ds.alert.send.count")
.tag("status", "fail")
.description("Alert failure count")
.register(Metrics.globalRegistry);
public void registerPendingAlertGauge(final Supplier<Number> supplier) {
Gauge.builder("ds.alert.pending", supplier)
.description("Number of pending alert")
.register(Metrics.globalRegistry);
}
public void incAlertSuccessCount() {
alertSuccessCounter.increment();
}
public void incAlertFailCount() {
alertFailCounter.increment();
}
}
......@@ -19,17 +19,15 @@ package org.apache.dolphinscheduler.server.master.metrics;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Metrics;
import lombok.experimental.UtilityClass;
public final class MasterServerMetrics {
private MasterServerMetrics() {
throw new UnsupportedOperationException("Utility class");
}
@UtilityClass
public class MasterServerMetrics {
/**
* Used to measure the master server is overload.
*/
private static final Counter MASTER_OVERLOAD_COUNTER =
private final Counter masterOverloadCounter =
Counter.builder("ds.master.overload.count")
.description("Master server overload count")
.register(Metrics.globalRegistry);
......@@ -37,17 +35,17 @@ public final class MasterServerMetrics {
/**
* Used to measure the number of process command consumed by master.
*/
private static final Counter MASTER_CONSUME_COMMAND_COUNTER =
private final Counter masterConsumeCommandCounter =
Counter.builder("ds.master.consume.command.count")
.description("Master server consume command count")
.register(Metrics.globalRegistry);
public static void incMasterOverload() {
MASTER_OVERLOAD_COUNTER.increment();
public void incMasterOverload() {
masterOverloadCounter.increment();
}
public static void incMasterConsumeCommand(int commandCount) {
MASTER_CONSUME_COMMAND_COUNTER.increment(commandCount);
public void incMasterConsumeCommand(int commandCount) {
masterConsumeCommandCounter.increment(commandCount);
}
}
......@@ -29,21 +29,19 @@ import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Timer;
import lombok.experimental.UtilityClass;
public final class ProcessInstanceMetrics {
@UtilityClass
public class ProcessInstanceMetrics {
private ProcessInstanceMetrics() {
throw new UnsupportedOperationException("Utility class");
}
private static Map<String, Counter> PROCESS_INSTANCE_COUNTERS = new HashMap<>();
private final Map<String, Counter> processInstanceCounters = new HashMap<>();
private static final Set<String> PROCESS_INSTANCE_STATES = ImmutableSet.of(
private final Set<String> processInstanceStates = ImmutableSet.of(
"submit", "timeout", "finish", "failover", "success", "fail", "stop");
static {
for (final String state : PROCESS_INSTANCE_STATES) {
PROCESS_INSTANCE_COUNTERS.put(
for (final String state : processInstanceStates) {
processInstanceCounters.put(
state,
Counter.builder("ds.workflow.instance.count")
.tag("state", state)
......@@ -54,38 +52,38 @@ public final class ProcessInstanceMetrics {
}
private static final Timer COMMAND_QUERY_TIMETER =
private final Timer commandQueryTimer =
Timer.builder("ds.workflow.command.query.duration")
.description("Command query duration")
.register(Metrics.globalRegistry);
private static final Timer PROCESS_INSTANCE_GENERATE_TIMER =
private final Timer processInstanceGenerateTimer =
Timer.builder("ds.workflow.instance.generate.duration")
.description("Process instance generated duration")
.register(Metrics.globalRegistry);
public static void recordCommandQueryTime(long milliseconds) {
COMMAND_QUERY_TIMETER.record(milliseconds, TimeUnit.MILLISECONDS);
public void recordCommandQueryTime(long milliseconds) {
commandQueryTimer.record(milliseconds, TimeUnit.MILLISECONDS);
}
public static void recordProcessInstanceGenerateTime(long milliseconds) {
PROCESS_INSTANCE_GENERATE_TIMER.record(milliseconds, TimeUnit.MILLISECONDS);
public void recordProcessInstanceGenerateTime(long milliseconds) {
processInstanceGenerateTimer.record(milliseconds, TimeUnit.MILLISECONDS);
}
public static synchronized void registerProcessInstanceRunningGauge(Supplier<Number> function) {
public synchronized void registerProcessInstanceRunningGauge(Supplier<Number> function) {
Gauge.builder("ds.workflow.instance.running", function)
.description("The current running process instance count")
.register(Metrics.globalRegistry);
}
public static synchronized void registerProcessInstanceResubmitGauge(Supplier<Number> function) {
public synchronized void registerProcessInstanceResubmitGauge(Supplier<Number> function) {
Gauge.builder("ds.workflow.instance.resubmit", function)
.description("The current process instance need to resubmit count")
.register(Metrics.globalRegistry);
}
public static void incProcessInstanceByState(final String state) {
PROCESS_INSTANCE_COUNTERS.get(state).increment();
public void incProcessInstanceByState(final String state) {
processInstanceCounters.get(state).increment();
}
}
......@@ -23,25 +23,23 @@ import java.util.Set;
import java.util.function.Supplier;
import com.facebook.presto.jdbc.internal.guava.collect.ImmutableSet;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.Metrics;
import lombok.experimental.UtilityClass;
@UtilityClass
public class TaskMetrics {
public final class TaskMetrics {
private TaskMetrics() {
throw new UnsupportedOperationException("Utility class");
}
private static Map<String, Counter> TASK_INSTANCE_COUNTERS = new HashMap<>();
private final Map<String, Counter> taskInstanceCounters = new HashMap<>();
private static final Set<String> TASK_INSTANCE_STATES = ImmutableSet.of(
private final Set<String> taskInstanceStates = ImmutableSet.of(
"submit", "timeout", "finish", "failover", "retry", "dispatch", "success", "fail", "stop");
static {
for (final String state : TASK_INSTANCE_STATES) {
TASK_INSTANCE_COUNTERS.put(
for (final String state : taskInstanceStates) {
taskInstanceCounters.put(
state,
Counter.builder("ds.task.instance.count")
.tags("state", state)
......@@ -52,41 +50,41 @@ public final class TaskMetrics {
}
private static final Counter TASK_DISPATCH_COUNTER =
private final Counter taskDispatchCounter =
Counter.builder("ds.task.dispatch.count")
.description("Task dispatch count")
.register(Metrics.globalRegistry);
private static final Counter TASK_DISPATCHER_FAILED =
private final Counter taskDispatchFailCounter =
Counter.builder("ds.task.dispatch.failure.count")
.description("Task dispatch failures count, retried ones included")
.register(Metrics.globalRegistry);
private static final Counter TASK_DISPATCH_ERROR =
private final Counter taskDispatchErrorCounter =
Counter.builder("ds.task.dispatch.error.count")
.description("Number of errors during task dispatch")
.register(Metrics.globalRegistry);
public synchronized static void registerTaskPrepared(Supplier<Number> consumer) {
public synchronized void registerTaskPrepared(Supplier<Number> consumer) {
Gauge.builder("ds.task.prepared", consumer)
.description("Task prepared count")
.register(Metrics.globalRegistry);
}
public static void incTaskDispatchFailed(int failedCount) {
TASK_DISPATCHER_FAILED.increment(failedCount);
public void incTaskDispatchFailed(int failedCount) {
taskDispatchFailCounter.increment(failedCount);
}
public static void incTaskDispatchError() {
TASK_DISPATCH_ERROR.increment();
public void incTaskDispatchError() {
taskDispatchErrorCounter.increment();
}
public static void incTaskDispatch() {
TASK_DISPATCH_COUNTER.increment();
public void incTaskDispatch() {
taskDispatchCounter.increment();
}
public static void incTaskInstanceByState(final String state) {
TASK_INSTANCE_COUNTERS.get(state).increment();
public void incTaskInstanceByState(final String state) {
taskInstanceCounters.get(state).increment();
}
}
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"limit": 100,
"name": "Annotations & Alerts",
"showIn": 0,
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"enable": true,
"expr": "resets(process_uptime_seconds{application=\"$application\", instance=\"$instance\"}[1m]) > 0",
"iconColor": "rgba(255, 96, 96, 1)",
"name": "Restart Detection",
"showIn": 0,
"step": "1m",
"tagKeys": "restart-tag",
"textFormat": "uptime reset",
"titleFormat": "Restart"
}
]
},
"description": "Dashboard for Micrometer instrumented applications (Java, Spring Boot, Micronaut)",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 4701,
"graphTooltip": 1,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 126,
"panels": [],
"title": "Alert Server",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 1
},
"id": 152,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "sum(increase(ds_alert_send_count_total{}[1m]))",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "sum(increase(ds_alert_send_count_total{status=\"success\"}[1m]))",
"hide": false,
"legendFormat": "__auto",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "sum(increase(ds_alert_send_count_total{status=\"fail\"}[1m]))",
"hide": false,
"refId": "C"
}
],
"title": "Alerts Sent Count/1m",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 1
},
"id": 144,
"links": [],
"maxDataPoints": 100,
"options": {
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {}
},
"pluginVersion": "9.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "builder",
"exemplar": true,
"expr": "ds_alert_pending",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "",
"metric": "",
"range": true,
"refId": "A",
"step": 14400
}
],
"title": "Pending Alerts",
"type": "gauge"
}
],
"refresh": "5s",
"schemaVersion": 36,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "standalone-server",
"value": "standalone-server"
},
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "Application",
"multi": false,
"name": "application",
"options": [],
"query": {
"query": "label_values(application)",
"refId": "Prometheus-application-Variable-Query"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allFormat": "glob",
"current": {
"selected": false,
"text": "host.docker.internal:12345",
"value": "host.docker.internal:12345"
},
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "Instance",
"multi": false,
"multiFormat": "glob",
"name": "instance",
"options": [],
"query": {
"query": "label_values(jvm_memory_used_bytes{application=\"$application\"}, instance)",
"refId": "Prometheus-instance-Variable-Query"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allFormat": "glob",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"definition": "",
"hide": 0,
"includeAll": true,
"label": "JVM Memory Pools Heap",
"multi": false,
"multiFormat": "glob",
"name": "jvm_memory_pool_heap",
"options": [],
"query": {
"query": "label_values(jvm_memory_used_bytes{application=\"$application\", instance=\"$instance\", area=\"heap\"},id)",
"refId": "Prometheus-jvm_memory_pool_heap-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allFormat": "glob",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"definition": "",
"hide": 0,
"includeAll": true,
"label": "JVM Memory Pools Non-Heap",
"multi": false,
"multiFormat": "glob",
"name": "jvm_memory_pool_nonheap",
"options": [],
"query": {
"query": "label_values(jvm_memory_used_bytes{application=\"$application\", instance=\"$instance\", area=\"nonheap\"},id)",
"refId": "Prometheus-jvm_memory_pool_nonheap-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"now": true,
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Alert",
"uid": "7ZgATOcnz",
"version": 3,
"weekStart": ""
}
\ No newline at end of file
......@@ -17,6 +17,7 @@
package org.apache.dolphinscheduler.server.worker.metrics;
import org.apache.dolphinscheduler.plugin.task.api.TaskChannelFactory;
import java.util.HashMap;
......@@ -25,15 +26,13 @@ import java.util.ServiceLoader;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Metrics;
import lombok.experimental.UtilityClass;
public final class TaskMetrics {
private TaskMetrics() {
throw new UnsupportedOperationException("Utility class");
}
@UtilityClass
public class TaskMetrics {
private static Map<String, Counter> TASK_TYPE_EXECUTE_COUNTER = new HashMap<>();
private static final Counter UNKNOWN_TASK_EXECUTE_COUNTER =
private final Map<String, Counter> taskTypeExecutionCounter = new HashMap<>();
private final Counter taskUnknownTypeExecutionCounter =
Counter.builder("ds.task.execution.count.by.type")
.tag("task_type", "unknown")
.description("task execution counter by type")
......@@ -41,7 +40,7 @@ public final class TaskMetrics {
static {
for (TaskChannelFactory taskChannelFactory : ServiceLoader.load(TaskChannelFactory.class)) {
TASK_TYPE_EXECUTE_COUNTER.put(
taskTypeExecutionCounter.put(
taskChannelFactory.getName(),
Counter.builder("ds.task.execution.count.by.type")
.tag("task_type", taskChannelFactory.getName())
......@@ -51,8 +50,8 @@ public final class TaskMetrics {
}
}
public static void incrTaskTypeExecuteCount(String taskType) {
TASK_TYPE_EXECUTE_COUNTER.getOrDefault(taskType, UNKNOWN_TASK_EXECUTE_COUNTER).increment();
public void incrTaskTypeExecuteCount(String taskType) {
taskTypeExecutionCounter.getOrDefault(taskType, taskUnknownTypeExecutionCounter).increment();
}
}
......@@ -30,36 +30,36 @@ import lombok.experimental.UtilityClass;
@UtilityClass
public class WorkerServerMetrics {
private static final Counter WORKER_OVERLOAD_COUNTER =
private final Counter workerOverloadCounter =
Counter.builder("ds.worker.overload.count")
.description("overloaded workers count")
.register(Metrics.globalRegistry);
private static final Counter WORKER_SUBMIT_QUEUE_IS_FULL_COUNTER =
private final Counter workerFullSubmitQueueCounter =
Counter.builder("ds.worker.full.submit.queue.count")
.description("full worker submit queues count")
.register(Metrics.globalRegistry);
private static final Counter WORKER_RESOURCE_DOWNLOAD_SUCCESS_COUNTER =
private final Counter workerResourceDownloadSuccessCounter =
Counter.builder("ds.worker.resource.download.count")
.tag("status", "success")
.description("worker resource download success count")
.register(Metrics.globalRegistry);
private static final Counter WORKER_RESOURCE_DOWNLOAD_FAILURE_COUNTER =
private final Counter workerResourceDownloadFailCounter =
Counter.builder("ds.worker.resource.download.count")
.tag("status", "fail")
.description("worker resource download failure count")
.register(Metrics.globalRegistry);
private static final Timer WORKER_RESOURCE_DOWNLOAD_DURATION_TIMER =
private final Timer workerResourceDownloadDurationTimer =
Timer.builder("ds.worker.resource.download.duration")
.publishPercentiles(0.5, 0.75, 0.95, 0.99)
.publishPercentileHistogram()
.description("time cost of resource download on workers")
.register(Metrics.globalRegistry);
private static final DistributionSummary WORKER_RESOURCE_DOWNLOAD_SIZE_DISTRIBUTION =
private final DistributionSummary workerResourceDownloadSizeDistribution =
DistributionSummary.builder("ds.worker.resource.download.size")
.baseUnit("bytes")
.publishPercentiles(0.5, 0.75, 0.95, 0.99)
......@@ -67,31 +67,31 @@ public class WorkerServerMetrics {
.description("size of downloaded resource files on worker")
.register(Metrics.globalRegistry);
public static void incWorkerOverloadCount() {
WORKER_OVERLOAD_COUNTER.increment();
public void incWorkerOverloadCount() {
workerOverloadCounter.increment();
}
public static void incWorkerSubmitQueueIsFullCount() {
WORKER_SUBMIT_QUEUE_IS_FULL_COUNTER.increment();
public void incWorkerSubmitQueueIsFullCount() {
workerFullSubmitQueueCounter.increment();
}
public static void incWorkerResourceDownloadSuccessCount() {
WORKER_RESOURCE_DOWNLOAD_SUCCESS_COUNTER.increment();
public void incWorkerResourceDownloadSuccessCount() {
workerResourceDownloadSuccessCounter.increment();
}
public static void incWorkerResourceDownloadFailureCount() {
WORKER_RESOURCE_DOWNLOAD_FAILURE_COUNTER.increment();
public void incWorkerResourceDownloadFailureCount() {
workerResourceDownloadFailCounter.increment();
}
public static void recordWorkerResourceDownloadTime(final long milliseconds) {
WORKER_RESOURCE_DOWNLOAD_DURATION_TIMER.record(milliseconds, TimeUnit.MILLISECONDS);
public void recordWorkerResourceDownloadTime(final long milliseconds) {
workerResourceDownloadDurationTimer.record(milliseconds, TimeUnit.MILLISECONDS);
}
public static void recordWorkerResourceDownloadSize(final long size) {
WORKER_RESOURCE_DOWNLOAD_SIZE_DISTRIBUTION.record(size);
public void recordWorkerResourceDownloadSize(final long size) {
workerResourceDownloadSizeDistribution.record(size);
}
public static void registerWorkerRunningTaskGauge(final Supplier<Number> supplier) {
public void registerWorkerRunningTaskGauge(final Supplier<Number> supplier) {
Gauge.builder("ds.task.running", supplier)
.description("number of running tasks on workers")
.register(Metrics.globalRegistry);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册