未验证 提交 7b02fea4 编写于 作者: P Peng Junzhi 提交者: GitHub

Fixing partially concurrent GC's duration statistics and adding a new GC monitor (#10623)

上级 f38f3f6a
...@@ -30,6 +30,7 @@ import org.apache.iotdb.commons.conf.IoTDBConstant; ...@@ -30,6 +30,7 @@ import org.apache.iotdb.commons.conf.IoTDBConstant;
import org.apache.iotdb.commons.exception.StartupException; import org.apache.iotdb.commons.exception.StartupException;
import org.apache.iotdb.commons.service.JMXService; import org.apache.iotdb.commons.service.JMXService;
import org.apache.iotdb.commons.service.RegisterManager; import org.apache.iotdb.commons.service.RegisterManager;
import org.apache.iotdb.commons.service.metric.JvmGcMonitorMetrics;
import org.apache.iotdb.commons.service.metric.MetricService; import org.apache.iotdb.commons.service.metric.MetricService;
import org.apache.iotdb.commons.utils.TestOnly; import org.apache.iotdb.commons.utils.TestOnly;
import org.apache.iotdb.confignode.client.ConfigNodeRequestType; import org.apache.iotdb.confignode.client.ConfigNodeRequestType;
...@@ -255,6 +256,7 @@ public class ConfigNode implements ConfigNodeMBean { ...@@ -255,6 +256,7 @@ public class ConfigNode implements ConfigNodeMBean {
MetricService.getInstance().addMetricSet(new ProcessMetrics()); MetricService.getInstance().addMetricSet(new ProcessMetrics());
MetricService.getInstance().addMetricSet(new DiskMetrics(IoTDBConstant.CN_ROLE)); MetricService.getInstance().addMetricSet(new DiskMetrics(IoTDBConstant.CN_ROLE));
MetricService.getInstance().addMetricSet(new NetMetrics(IoTDBConstant.CN_ROLE)); MetricService.getInstance().addMetricSet(new NetMetrics(IoTDBConstant.CN_ROLE));
MetricService.getInstance().addMetricSet(JvmGcMonitorMetrics.getInstance());
MetricService.getInstance().addMetricSet(ClientManagerMetrics.getInstance()); MetricService.getInstance().addMetricSet(ClientManagerMetrics.getInstance());
MetricService.getInstance().addMetricSet(ThreadPoolMetrics.getInstance()); MetricService.getInstance().addMetricSet(ThreadPoolMetrics.getInstance());
initCpuMetrics(); initCpuMetrics();
......
...@@ -25,6 +25,7 @@ import org.apache.iotdb.commons.concurrent.ThreadName; ...@@ -25,6 +25,7 @@ import org.apache.iotdb.commons.concurrent.ThreadName;
import org.apache.iotdb.commons.concurrent.ThreadPoolMetrics; import org.apache.iotdb.commons.concurrent.ThreadPoolMetrics;
import org.apache.iotdb.commons.conf.CommonDescriptor; import org.apache.iotdb.commons.conf.CommonDescriptor;
import org.apache.iotdb.commons.conf.IoTDBConstant; import org.apache.iotdb.commons.conf.IoTDBConstant;
import org.apache.iotdb.commons.service.metric.JvmGcMonitorMetrics;
import org.apache.iotdb.commons.service.metric.MetricService; import org.apache.iotdb.commons.service.metric.MetricService;
import org.apache.iotdb.commons.service.metric.PerformanceOverviewMetrics; import org.apache.iotdb.commons.service.metric.PerformanceOverviewMetrics;
import org.apache.iotdb.db.conf.IoTDBDescriptor; import org.apache.iotdb.db.conf.IoTDBDescriptor;
...@@ -77,6 +78,9 @@ public class DataNodeMetricsHelper { ...@@ -77,6 +78,9 @@ public class DataNodeMetricsHelper {
// bind performance overview related metrics // bind performance overview related metrics
MetricService.getInstance().addMetricSet(PerformanceOverviewMetrics.getInstance()); MetricService.getInstance().addMetricSet(PerformanceOverviewMetrics.getInstance());
// bind gc metrics
MetricService.getInstance().addMetricSet(JvmGcMonitorMetrics.getInstance());
} }
private static void initSystemMetrics() { private static void initSystemMetrics() {
......
...@@ -45,6 +45,7 @@ import java.lang.management.MemoryUsage; ...@@ -45,6 +45,7 @@ import java.lang.management.MemoryUsage;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
...@@ -52,10 +53,11 @@ import java.util.concurrent.atomic.AtomicLong; ...@@ -52,10 +53,11 @@ import java.util.concurrent.atomic.AtomicLong;
/** This file is modified from io.micrometer.core.instrument.binder.jvm.JvmGcMetrics */ /** This file is modified from io.micrometer.core.instrument.binder.jvm.JvmGcMetrics */
public class JvmGcMetrics implements IMetricSet, AutoCloseable { public class JvmGcMetrics implements IMetricSet, AutoCloseable {
private static final Logger logger = LoggerFactory.getLogger(JvmGcMetrics.class); private static final Logger logger = LoggerFactory.getLogger(JvmGcMetrics.class);
private final List<Runnable> notificationListenerCleanUpRunnables = new CopyOnWriteArrayList<>();
private String youngGenPoolName; private String youngGenPoolName;
private String oldGenPoolName; private String oldGenPoolName;
private String nonGenerationalMemoryPool; private String nonGenerationalMemoryPool;
private final List<Runnable> notificationListenerCleanUpRunnables = new CopyOnWriteArrayList<>(); private final Map<String, AtomicLong> lastGcTotalDurationMap = new ConcurrentHashMap<>();
public JvmGcMetrics() { public JvmGcMetrics() {
for (MemoryPoolMXBean mbean : ManagementFactory.getMemoryPoolMXBeans()) { for (MemoryPoolMXBean mbean : ManagementFactory.getMemoryPoolMXBeans()) {
...@@ -70,6 +72,48 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable { ...@@ -70,6 +72,48 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
} }
} }
private static boolean isPartiallyConcurrentGC(GarbageCollectorMXBean gc) {
switch (gc.getName()) {
// First two are from the 'serial' collector which are not concurrent, obviously.
case "Copy":
case "MarkSweepCompact":
// The following 4 GCs do not contain concurrent execution phase.
case "PS MarkSweep":
case "PS Scavenge":
case "G1 Young Generation":
case "ParNew":
return false;
// The following 2 GCs' execution process consists of concurrent phase, which means they can
// run simultaneously with the user thread in some phases.
// Concurrent mark and concurrent sweep
case "ConcurrentMarkSweep":
// Concurrent mark
case "G1 Old Generation":
return true;
default:
// Assume possibly concurrent if unsure
return true;
}
}
private static boolean isConcurrentPhase(String cause, String name) {
return "No GC".equals(cause) || "Shenandoah Cycles".equals(name);
}
private static boolean isYoungGenPool(String name) {
return name != null && (name.endsWith("Eden Space") || name.endsWith("Survivor Space"));
}
private static boolean isOldGenPool(String name) {
return name != null && (name.endsWith("Old Gen") || name.endsWith("Tenured Gen"));
}
private static boolean isNonGenerationalHeapPool(String name) {
return "Shenandoah".equals(name) || "ZHeap".equals(name);
}
@Override @Override
public void bindTo(AbstractMetricService metricService) { public void bindTo(AbstractMetricService metricService) {
if (!preCheck()) { if (!preCheck()) {
...@@ -98,6 +142,13 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable { ...@@ -98,6 +142,13 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
liveDataSize, liveDataSize,
AtomicLong::get); AtomicLong::get);
AtomicLong heapMemUsedPercentage = new AtomicLong(calculateMemoryUsagePercentage());
metricService.createAutoGauge(
SystemMetric.JVM_GC_MEMORY_USED_PERCENT.toString(),
MetricLevel.CORE,
heapMemUsedPercentage,
AtomicLong::get);
Counter allocatedBytes = Counter allocatedBytes =
metricService.getOrCreateCounter( metricService.getOrCreateCounter(
SystemMetric.JVM_GC_MEMORY_ALLOCATED_BYTES.toString(), MetricLevel.CORE); SystemMetric.JVM_GC_MEMORY_ALLOCATED_BYTES.toString(), MetricLevel.CORE);
...@@ -124,18 +175,69 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable { ...@@ -124,18 +175,69 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
String gcCause = notificationInfo.getGcCause(); String gcCause = notificationInfo.getGcCause();
String gcAction = notificationInfo.getGcAction(); String gcAction = notificationInfo.getGcAction();
GcInfo gcInfo = notificationInfo.getGcInfo(); GcInfo gcInfo = notificationInfo.getGcInfo();
// The duration supplied in the notification info includes more than just
// application stopped time for concurrent GCs (since the concurrent phase is not
// stop-the-world).
// E.g. For mixed GC or full GC in collector 'G1 old generation', the duration collected
// here is more than the actual pause time (the latter can be accessed by GC
// log/-XX:PrintGCDetails)
long duration = gcInfo.getDuration(); long duration = gcInfo.getDuration();
// Try and do a better job coming up with a good stopped time
// value by asking for and tracking cumulative time spent blocked in GC.
if (isPartiallyConcurrentGC(mbean)) {
AtomicLong previousTotal =
lastGcTotalDurationMap.computeIfAbsent(mbean.getName(), k -> new AtomicLong());
long total = mbean.getCollectionTime();
duration = total - previousTotal.get(); // may be zero for a really fast collection
previousTotal.set(total);
}
String timerName; String timerName;
if (isConcurrentPhase(gcCause, notificationInfo.getGcName())) { if (isConcurrentPhase(gcCause, notificationInfo.getGcName())) {
timerName = "jvm_gc_concurrent_phase_time"; timerName = "jvm_gc_concurrent_phase_time";
} else { } else {
timerName = "jvm_gc_pause"; timerName = "jvm_gc_pause";
} }
// create a timer with tags named by gcCause, which binds gcCause with gcDuration
Timer timer = Timer timer =
metricService.getOrCreateTimer( metricService.getOrCreateTimer(
timerName, MetricLevel.CORE, "action", gcAction, "cause", gcCause); timerName, MetricLevel.CORE, "action", gcAction, "cause", gcCause);
timer.update(duration, TimeUnit.MILLISECONDS); timer.update(duration, TimeUnit.MILLISECONDS);
// add support for ZGC
if (mbean.getName().equals("ZGC Cycles")) {
Counter cyclesCount =
metricService.getOrCreateCounter(
SystemMetric.JVM_ZGC_CYCLES_COUNT.toString(), MetricLevel.CORE);
cyclesCount.inc();
} else if (mbean.getName().equals("ZGC Pauses")) {
Counter pausesCount =
metricService.getOrCreateCounter(
SystemMetric.JVM_ZGC_PAUSES_COUNT.toString(), MetricLevel.CORE);
pausesCount.inc();
}
// monitoring old/young GC count, which is helpful for users to locate GC exception.
// Unfortunately, the JMX doesn't seem to provide an api for monitoring mixed gc in G1.
// In fact, JMX may treat mixed GCs as minor GCs.
if (GcGenerationAge.fromName(notificationInfo.getGcName()) == GcGenerationAge.OLD) {
Counter oldGcCounter =
metricService.getOrCreateCounter(
SystemMetric.JVM_GC_YOUNG_GC_COUNT.toString(), MetricLevel.CORE);
oldGcCounter.inc();
} else if (GcGenerationAge.fromName(notificationInfo.getGcName())
== GcGenerationAge.YOUNG) {
Counter youngGcCounter =
metricService.getOrCreateCounter(
SystemMetric.JVM_GC_OLD_GC_COUNT.toString(), MetricLevel.CORE);
youngGcCounter.inc();
}
// update memory usage percentage
heapMemUsedPercentage.set(calculateMemoryUsagePercentage());
// Update promotion and allocation counters // Update promotion and allocation counters
final Map<String, MemoryUsage> before = gcInfo.getMemoryUsageBeforeGc(); final Map<String, MemoryUsage> before = gcInfo.getMemoryUsageBeforeGc();
final Map<String, MemoryUsage> after = gcInfo.getMemoryUsageAfterGc(); final Map<String, MemoryUsage> after = gcInfo.getMemoryUsageAfterGc();
...@@ -153,39 +255,43 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable { ...@@ -153,39 +255,43 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
final long longLivedMaxAfter = after.get(nonGenerationalMemoryPool).getMax(); final long longLivedMaxAfter = after.get(nonGenerationalMemoryPool).getMax();
maxDataSize.set(longLivedMaxAfter); maxDataSize.set(longLivedMaxAfter);
} }
return;
} }
if (oldGenPoolName != null) { // should add `else` here, since there are only two
final long oldBefore = before.get(oldGenPoolName).getUsed(); // cases: generational and non-generational
final long oldAfter = after.get(oldGenPoolName).getUsed(); else {
final long delta = oldAfter - oldBefore; if (oldGenPoolName != null) {
if (delta > 0L && promotedBytes != null) { final long oldBefore = before.get(oldGenPoolName).getUsed();
promotedBytes.inc(delta); final long oldAfter = after.get(oldGenPoolName).getUsed();
final long delta = oldAfter - oldBefore;
if (delta > 0L && promotedBytes != null) {
promotedBytes.inc(delta);
}
// Some GC implementations such as G1 can reduce the old gen size as part of a minor
// GC (since in JMX, a minor GC of G1 may actually represent mixed GC, which collect
// some obj in old gen region). To track the
// live data size we record the value if we see a reduction in the old gen heap size
// or
// after a major GC.
if (oldAfter < oldBefore
|| GcGenerationAge.fromName(notificationInfo.getGcName())
== GcGenerationAge.OLD) {
liveDataSize.set(oldAfter);
final long oldMaxAfter = after.get(oldGenPoolName).getMax();
maxDataSize.set(oldMaxAfter);
}
} }
// Some GC implementations such as G1 can reduce the old gen size as part of a minor if (youngGenPoolName != null) {
// GC. To track the countPoolSizeDelta(
// live data size we record the value if we see a reduction in the old gen heap size gcInfo.getMemoryUsageBeforeGc(),
// or gcInfo.getMemoryUsageAfterGc(),
// after a major GC. allocatedBytes,
if (oldAfter < oldBefore heapPoolSizeAfterGc,
|| GcGenerationAge.fromName(notificationInfo.getGcName()) youngGenPoolName);
== GcGenerationAge.OLD) {
liveDataSize.set(oldAfter);
final long oldMaxAfter = after.get(oldGenPoolName).getMax();
maxDataSize.set(oldMaxAfter);
} }
} }
if (youngGenPoolName != null) {
countPoolSizeDelta(
gcInfo.getMemoryUsageBeforeGc(),
gcInfo.getMemoryUsageAfterGc(),
allocatedBytes,
heapPoolSizeAfterGc,
youngGenPoolName);
}
}; };
NotificationEmitter notificationEmitter = (NotificationEmitter) mbean; NotificationEmitter notificationEmitter = (NotificationEmitter) mbean;
notificationEmitter.addNotificationListener( notificationEmitter.addNotificationListener(
...@@ -216,6 +322,7 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable { ...@@ -216,6 +322,7 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
metricService.remove( metricService.remove(
MetricType.AUTO_GAUGE, SystemMetric.JVM_GC_LIVE_DATA_SIZE_BYTES.toString()); MetricType.AUTO_GAUGE, SystemMetric.JVM_GC_LIVE_DATA_SIZE_BYTES.toString());
metricService.remove(MetricType.COUNTER, SystemMetric.JVM_GC_MEMORY_ALLOCATED_BYTES.toString()); metricService.remove(MetricType.COUNTER, SystemMetric.JVM_GC_MEMORY_ALLOCATED_BYTES.toString());
metricService.remove(MetricType.AUTO_GAUGE, SystemMetric.JVM_GC_MEMORY_USED_PERCENT.toString());
if (oldGenPoolName != null) { if (oldGenPoolName != null) {
metricService.remove( metricService.remove(
...@@ -242,6 +349,20 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable { ...@@ -242,6 +349,20 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
timerName = "jvm_gc_pause"; timerName = "jvm_gc_pause";
} }
metricService.remove(MetricType.TIMER, timerName, "action", gcAction, "cause", gcCause); metricService.remove(MetricType.TIMER, timerName, "action", gcAction, "cause", gcCause);
if (mbean.getName().equals("ZGC Cycles")) {
metricService.remove(
MetricType.COUNTER, SystemMetric.JVM_ZGC_CYCLES_COUNT.toString());
} else if (mbean.getName().equals("ZGC Pauses")) {
metricService.remove(
MetricType.COUNTER, SystemMetric.JVM_ZGC_PAUSES_COUNT.toString());
}
if (GcGenerationAge.fromName(notificationInfo.getGcName()) == GcGenerationAge.OLD) {
metricService.remove(
MetricType.COUNTER, SystemMetric.JVM_GC_YOUNG_GC_COUNT.toString());
} else if (GcGenerationAge.fromName(notificationInfo.getGcName())
== GcGenerationAge.YOUNG) {
metricService.remove(MetricType.COUNTER, SystemMetric.JVM_GC_OLD_GC_COUNT.toString());
}
}; };
NotificationEmitter notificationEmitter = (NotificationEmitter) mbean; NotificationEmitter notificationEmitter = (NotificationEmitter) mbean;
notificationEmitter.addNotificationListener( notificationEmitter.addNotificationListener(
...@@ -305,12 +426,18 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable { ...@@ -305,12 +426,18 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
notificationListenerCleanUpRunnables.forEach(Runnable::run); notificationListenerCleanUpRunnables.forEach(Runnable::run);
} }
public long calculateMemoryUsagePercentage() {
return (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getUsed()
* 100
/ Runtime.getRuntime().maxMemory());
}
enum GcGenerationAge { enum GcGenerationAge {
OLD, OLD,
YOUNG, YOUNG,
UNKNOWN; UNKNOWN;
private static Map<String, GcGenerationAge> knownCollectors = new HashMap<>(); private static final Map<String, GcGenerationAge> knownCollectors = new HashMap<>();
static { static {
knownCollectors.put("ConcurrentMarkSweep", OLD); knownCollectors.put("ConcurrentMarkSweep", OLD);
...@@ -327,20 +454,4 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable { ...@@ -327,20 +454,4 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
return knownCollectors.getOrDefault(name, UNKNOWN); return knownCollectors.getOrDefault(name, UNKNOWN);
} }
} }
private static boolean isConcurrentPhase(String cause, String name) {
return "No GC".equals(cause) || "Shenandoah Cycles".equals(name);
}
private static boolean isYoungGenPool(String name) {
return name != null && name.endsWith("Eden Space");
}
private static boolean isOldGenPool(String name) {
return name != null && (name.endsWith("Old Gen") || name.endsWith("Tenured Gen"));
}
private static boolean isNonGenerationalHeapPool(String name) {
return "Shenandoah".equals(name) || "ZHeap".equals(name);
}
} }
...@@ -81,6 +81,12 @@ public enum SystemMetric { ...@@ -81,6 +81,12 @@ public enum SystemMetric {
JVM_GC_LIVE_DATA_SIZE_BYTES("jvm_gc_live_data_size_bytes"), JVM_GC_LIVE_DATA_SIZE_BYTES("jvm_gc_live_data_size_bytes"),
JVM_GC_MEMORY_ALLOCATED_BYTES("jvm_gc_memory_allocated_bytes"), JVM_GC_MEMORY_ALLOCATED_BYTES("jvm_gc_memory_allocated_bytes"),
JVM_GC_MEMORY_PROMOTED_BYTES("jvm_gc_memory_promoted_bytes"), JVM_GC_MEMORY_PROMOTED_BYTES("jvm_gc_memory_promoted_bytes"),
JVM_GC_MEMORY_USED_PERCENT("jvm_gc_memory_used_percent"),
JVM_ZGC_CYCLES_COUNT("jvm_zgc_cycles_count"),
JVM_ZGC_PAUSES_COUNT("jvm_zgc_pauses_count"),
JVM_GC_YOUNG_GC_COUNT("jvm_gc_old_gc_count"),
JVM_GC_OLD_GC_COUNT("jvm_gc_young_gc_count"),
JVM_GC_ACCUMULATED_TIME_PERCENTAGE("jvm_gc_accumulated_time_percentage"),
// net related // net related
RECEIVED_BYTES("received_bytes"), RECEIVED_BYTES("received_bytes"),
RECEIVED_PACKETS("received_packets"), RECEIVED_PACKETS("received_packets"),
......
...@@ -138,6 +138,7 @@ public enum ThreadName { ...@@ -138,6 +138,7 @@ public enum ThreadName {
// NOTICE: The thread name of jvm cannot be edited here! // NOTICE: The thread name of jvm cannot be edited here!
// We list the thread name here just for distinguishing what module the thread belongs to. // We list the thread name here just for distinguishing what module the thread belongs to.
JVM_PAUSE_MONITOR("JvmPauseMonitor"), JVM_PAUSE_MONITOR("JvmPauseMonitor"),
JVM_GC_STATISTICS_MONITOR("JVM-GC-Statistics-Monitor"),
PARALLEL_GC("GC task thread"), PARALLEL_GC("GC task thread"),
G1_GC("GC Thread"), G1_GC("GC Thread"),
G1_MAIN_MARKER("G1 Main Marker"), G1_MAIN_MARKER("G1 Main Marker"),
...@@ -273,6 +274,7 @@ public enum ThreadName { ...@@ -273,6 +274,7 @@ public enum ThreadName {
new HashSet<>( new HashSet<>(
Arrays.asList( Arrays.asList(
JVM_PAUSE_MONITOR, JVM_PAUSE_MONITOR,
JVM_GC_STATISTICS_MONITOR,
PARALLEL_GC, PARALLEL_GC,
G1_GC, G1_GC,
G1_MAIN_MARKER, G1_MAIN_MARKER,
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.commons.service.metric;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.SimpleDateFormat;
import java.util.Date;
public class GcTimeAlerter implements JvmGcMonitorMetrics.GcTimeAlertHandler {
private static final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private static final Logger logger = LoggerFactory.getLogger(GcTimeAlerter.class);
/**
* Alert handler func User can tailor their handle logic here
*
* @param gcData
*/
@Override
public void alert(JvmGcMonitorMetrics.GcData gcData) {
logger.warn(
"Error metrics taken time: "
+ sdf.format(new Date(Long.parseLong(String.valueOf(gcData.getTimestamp())))));
logger.warn("Gc Time Percentage: " + gcData.getGcTimePercentage() + "%");
logger.warn(
"Accumulated GC time within current observation window: "
+ gcData.getGcTimeWithinObsWindow()
+ " ms");
logger.warn(
"The observation window is from: "
+ sdf.format(new Date(Long.parseLong(String.valueOf(gcData.getStartObsWindowTs()))))
+ " to: "
+ sdf.format(new Date(Long.parseLong(String.valueOf(gcData.getTimestamp())))));
logger.warn("The observation window time is: " + gcData.getCurrentObsWindowTs() + " ms.");
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.commons.service.metric;
import org.apache.iotdb.commons.concurrent.IoTDBThreadPoolFactory;
import org.apache.iotdb.commons.concurrent.ThreadName;
import org.apache.iotdb.commons.concurrent.threadpool.ScheduledExecutorUtil;
import org.apache.iotdb.metrics.AbstractMetricService;
import org.apache.iotdb.metrics.metricsets.IMetricSet;
import org.apache.iotdb.metrics.utils.MetricLevel;
import org.apache.iotdb.metrics.utils.MetricType;
import org.apache.iotdb.metrics.utils.SystemMetric;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
public class JvmGcMonitorMetrics implements IMetricSet {
// Duration of observation window
public static final long OBSERVATION_WINDOW_MS = TimeUnit.MINUTES.toMillis(1);
// Interval for data collection
public static final long SLEEP_INTERVAL_MS = TimeUnit.SECONDS.toMillis(5);
// Max GC time threshold
public static final long MAX_GC_TIME_PERCENTAGE = 40L;
// The time when IoTDB start running
private static long startTime;
private static final Logger logger = LoggerFactory.getLogger(JvmGcMonitorMetrics.class);
private final ScheduledExecutorService scheduledGCInfoMonitor =
IoTDBThreadPoolFactory.newSingleThreadScheduledExecutor(
ThreadName.JVM_GC_STATISTICS_MONITOR.getName());
private Future<?> scheduledGcMonitorFuture;
// Ring buffers containing GC timings and timestamps when timings were taken
private final TsAndData[] gcDataBuf;
// Buffer size
private final int bufSize;
// Buffer start index
private int startIdx;
// Buffer end index
private int endIdx;
// Container to hold collected GC data
private final GcData curData = new GcData();
// Hook function called with GC exception
private final GcTimeAlertHandler alertHandler;
public JvmGcMonitorMetrics() {
bufSize = (int) (OBSERVATION_WINDOW_MS / SLEEP_INTERVAL_MS + 2);
// Prevent the user from accidentally creating an abnormally big buffer, which will result in
// slow calculations and likely inaccuracy.
Preconditions.checkArgument(bufSize <= 128 * 1024);
gcDataBuf = new TsAndData[bufSize];
for (int i = 0; i < bufSize; i++) {
gcDataBuf[i] = new TsAndData();
}
alertHandler = new GcTimeAlerter();
}
@Override
public void bindTo(AbstractMetricService metricService) {
metricService.createAutoGauge(
SystemMetric.JVM_GC_ACCUMULATED_TIME_PERCENTAGE.toString(),
MetricLevel.CORE,
curData,
GcData::getGcTimePercentage);
startTime = System.currentTimeMillis();
// current collect time: startTime + start delay(50ms)
gcDataBuf[startIdx].setValues(startTime + TimeUnit.MILLISECONDS.toMillis(50), 0);
scheduledGcMonitorFuture =
ScheduledExecutorUtil.safelyScheduleWithFixedDelay(
scheduledGCInfoMonitor,
this::scheduledMonitoring,
TimeUnit.MILLISECONDS.toMillis(50), // to prevent / ZERO exception
SLEEP_INTERVAL_MS,
TimeUnit.MILLISECONDS);
}
@Override
public void unbindFrom(AbstractMetricService metricService) {
metricService.remove(
MetricType.AUTO_GAUGE, SystemMetric.JVM_GC_ACCUMULATED_TIME_PERCENTAGE.toString());
if (scheduledGcMonitorFuture != null) {
scheduledGcMonitorFuture.cancel(false);
scheduledGcMonitorFuture = null;
logger.info("JVM GC scheduled monitor is stopped successfully.");
}
}
private void scheduledMonitoring() {
calculateGCTimePercentageWithinObservedInterval();
if (alertHandler != null && curData.gcTimePercentage.get() > MAX_GC_TIME_PERCENTAGE) {
alertHandler.alert(curData.clone());
}
}
private void calculateGCTimePercentageWithinObservedInterval() {
long prevTotalGcTime = curData.getAccumulatedGcTime();
long totalGcTime = 0;
for (GarbageCollectorMXBean gcBean : ManagementFactory.getGarbageCollectorMXBeans()) {
totalGcTime += gcBean.getCollectionTime();
}
long gcTimeWithinSleepInterval = totalGcTime - prevTotalGcTime;
long curTime = System.currentTimeMillis();
long gcMonitorRunTime = curTime - startTime;
endIdx = (endIdx + 1) % bufSize;
gcDataBuf[endIdx].setValues(curTime, gcTimeWithinSleepInterval);
// Move startIdx forward until we reach the first buffer entry with
// timestamp within the observation window.
long startObsWindowTs = curTime - OBSERVATION_WINDOW_MS;
while (gcDataBuf[startIdx].ts < startObsWindowTs && startIdx != endIdx) {
startIdx = (startIdx + 1) % bufSize;
}
// Calculate total GC time within observationWindowMs.
// We should be careful about GC time that passed before the first timestamp
// in our observation window.
long gcTimeWithinObservationWindow =
Math.min(gcDataBuf[startIdx].gcPause, gcDataBuf[startIdx].ts - startObsWindowTs);
if (startIdx != endIdx) {
for (int i = (startIdx + 1) % bufSize; i != endIdx; i = (i + 1) % bufSize) {
gcTimeWithinObservationWindow += gcDataBuf[i].gcPause;
}
}
curData.update(
curTime,
startObsWindowTs,
totalGcTime,
gcTimeWithinObservationWindow,
(int)
(gcTimeWithinObservationWindow
* 100
/ Math.min(OBSERVATION_WINDOW_MS, gcMonitorRunTime)));
}
/** Encapsulates data about GC pauses measured at the specific timestamp. */
public static class GcData implements Cloneable {
// The time when this object get updated.
private final AtomicLong timestamp = new AtomicLong();
// The theoretical start time of the observation window, usually equal to `timestamp -
// OBSERVATION_WINDOW_MS`
private final AtomicLong startObsWindowTs = new AtomicLong();
// Accumulated GC time since the start of IoTDB.
private final AtomicLong accumulatedGcTime = new AtomicLong();
// The percentage (0..100) of time that the JVM spent in GC pauses within the observation window
private final AtomicLong gcTimePercentage = new AtomicLong();
// Accumulated GC time within the latest observation window.
private final AtomicLong gcTimeWithinObsWindow = new AtomicLong();
/**
* Returns the length of current observation window, usually equal to OBSERVATION_WINDOW_MS. If
* IoTDB is started after the start of the theoretical time window, then IoTDB startup time is
* returned.
*
* @return current observation window time, millisecond.
*/
public long getCurrentObsWindowTs() {
return Math.min(timestamp.get() - startTime, timestamp.get() - startObsWindowTs.get());
}
/**
* Returns the absolute timestamp when this measurement was taken.
*
* @return timestamp.
*/
public long getTimestamp() {
return timestamp.get();
}
/**
* Returns the start timestamp of the latest observation window.
*
* @return the actual start timestamp of the obs window.
*/
public long getStartObsWindowTs() {
return Math.max(startObsWindowTs.get(), startTime);
}
/**
* Returns accumulated GC time since the start of IoTDB.
*
* @return AccumulatedGcTime.
*/
public long getAccumulatedGcTime() {
return accumulatedGcTime.get();
}
/**
* Returns accumulated GC time within the latest observation window.
*
* @return gcTimeWithinObsWindow.
*/
public long getGcTimeWithinObsWindow() {
return gcTimeWithinObsWindow.get();
}
/**
* Returns the percentage (0..100) of time that the JVM spent in GC pauses within the
* observation window of the associated GCTimeMonitor.
*
* @return GcTimePercentage.
*/
public long getGcTimePercentage() {
return gcTimePercentage.get();
}
private synchronized void update(
long inTimestamp,
long inStartObsWindowTs,
long inTotalGcTime,
long inGcTimeWithinObsWindow,
int inGcTimePercentage) {
this.timestamp.set(inTimestamp);
this.startObsWindowTs.set(inStartObsWindowTs);
this.accumulatedGcTime.set(inTotalGcTime);
this.gcTimeWithinObsWindow.set(inGcTimeWithinObsWindow);
this.gcTimePercentage.set(inGcTimePercentage);
}
@Override
public synchronized GcData clone() {
try {
return (GcData) super.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
}
private static class TsAndData {
// Timestamp when this measurement was taken
private long ts;
// Total GC pause time within the interval between ts
// and the timestamp of the previous measurement.
private long gcPause;
void setValues(long inTs, long inGcPause) {
this.ts = inTs;
this.gcPause = inGcPause;
}
}
/**
* The user can provide an instance of a class implementing this interface when initializing a
* GCTimeMonitor to receive alerts when GC time percentage exceeds the specified threshold.
*/
public interface GcTimeAlertHandler {
void alert(GcData gcData);
}
private static class JvmGcMetricsHolder {
private static final JvmGcMonitorMetrics INSTANCE = new JvmGcMonitorMetrics();
private JvmGcMetricsHolder() {
// empty constructor
}
}
public static JvmGcMonitorMetrics getInstance() {
return JvmGcMetricsHolder.INSTANCE;
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册