From 815c21b3fb80863e9fabba2d3727cd8b23fdac4a Mon Sep 17 00:00:00 2001 From: Zhijia Cao Date: Tue, 5 Sep 2023 14:32:25 +0800 Subject: [PATCH] Fixed wal triggering disk threshold loop too many times (#11017) --- .../dataregion/wal/WALManager.java | 27 ++++++++++++------- .../dataregion/wal/node/WALNode.java | 8 ++---- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/WALManager.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/WALManager.java index 96dd53dd41..6ccdb2aa3d 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/WALManager.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/WALManager.java @@ -172,16 +172,23 @@ public class WALManager implements IService { } private void deleteOutdatedFiles() { - List walNodes = walNodesManager.getNodesSnapshot(); - walNodes.sort((node1, node2) -> Long.compare(node2.getDiskUsage(), node1.getDiskUsage())); - for (WALNode walNode : walNodes) { - walNode.deleteOutdatedFiles(); - } - if (shouldThrottle()) { - logger.warn( - "WAL disk usage {} is larger than the iot_consensus_throttle_threshold_in_byte {}, please check your write load, iot consensus and the pipe module. It's better to allocate more disk for WAL.", - getTotalDiskUsage(), - getThrottleThreshold()); + // Normally, only need to delete the expired file once. When the WAL disk file size exceeds the + // threshold, the system continues to delete expired files until the disk size is smaller than + // the threshold. + boolean firstLoop = true; + while (firstLoop || shouldThrottle()) { + List walNodes = walNodesManager.getNodesSnapshot(); + walNodes.sort((node1, node2) -> Long.compare(node2.getDiskUsage(), node1.getDiskUsage())); + for (WALNode walNode : walNodes) { + walNode.deleteOutdatedFiles(); + } + if (firstLoop && shouldThrottle()) { + logger.warn( + "WAL disk usage {} is larger than the iot_consensus_throttle_threshold_in_byte {}, please check your write load, iot consensus and the pipe module. It's better to allocate more disk for WAL.", + getTotalDiskUsage(), + getThrottleThreshold()); + } + firstLoop = false; } } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/node/WALNode.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/node/WALNode.java index 84517ae3b2..1bfa5dd7b0 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/node/WALNode.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/node/WALNode.java @@ -267,10 +267,7 @@ public class WALNode implements IWALNode { public void run() { // The intent of the loop execution here is to try to get as many memTable flush or snapshot // as possible when the valid information ratio is less than the configured value. - // In addition, if the disk space used by wal exceeds the limit threshold, resulting in a - // write rejection, the task will continue to attempt to delete expired files until the - // threshold is no longer exceeded - while (recursionTime < MAX_RECURSION_TIME || WALManager.getInstance().shouldThrottle()) { + while (recursionTime < MAX_RECURSION_TIME) { // init delete outdated file task fields init(); @@ -286,8 +283,7 @@ public class WALNode implements IWALNode { // decide whether to snapshot or flush based on the effective info ration and throttle // threshold if (trySnapshotOrFlushMemTable() - && safelyDeletedSearchIndex != DEFAULT_SAFELY_DELETED_SEARCH_INDEX - && !WALManager.getInstance().shouldThrottle()) { + && safelyDeletedSearchIndex != DEFAULT_SAFELY_DELETED_SEARCH_INDEX) { return; } recursionTime++; -- GitLab