From b9ed1ed9ce5f5720d2f7bbe31ab863ba4d9c0749 Mon Sep 17 00:00:00 2001 From: William Song <48054931+SzyWilliam@users.noreply.github.com> Date: Wed, 6 Sep 2023 10:58:35 +0800 Subject: [PATCH] [RatisConsensus] retry cache expiration time should be longer than retriable-client wait duration (#11045) --- .../iotdb/consensus/ratis/utils/Utils.java | 23 +++++++++++++ .../ratis/{ => utils}/UtilsTest.java | 32 +++++++++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) rename iotdb-core/consensus/src/test/java/org/apache/iotdb/consensus/ratis/{ => utils}/UtilsTest.java (63%) diff --git a/iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/ratis/utils/Utils.java b/iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/ratis/utils/Utils.java index 7eb30f1a93..cd2cf7f598 100644 --- a/iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/ratis/utils/Utils.java +++ b/iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/ratis/utils/Utils.java @@ -39,6 +39,7 @@ import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.util.TimeDuration; import org.apache.thrift.TException; import org.apache.thrift.protocol.TCompactProtocol; import org.apache.thrift.transport.TByteBuffer; @@ -46,6 +47,7 @@ import org.apache.thrift.transport.TByteBuffer; import java.io.File; import java.nio.ByteBuffer; import java.util.List; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; public class Utils { @@ -207,6 +209,24 @@ public class Utils { return config.isReadOnly() && !config.isStopping(); } + /** return the max wait duration for retry */ + static TimeDuration getMaxRetrySleepTime(RatisConfig.Client config) { + final int maxAttempts = config.getClientMaxRetryAttempt(); + final long baseSleepMs = config.getClientRetryInitialSleepTimeMs(); + final long maxSleepMs = config.getClientRetryMaxSleepTimeMs(); + final long timeoutMs = config.getClientRequestTimeoutMillis(); + + long maxWaitMs = 0L; + long currentSleepMs = baseSleepMs; + for (int i = 0; i < maxAttempts; i++) { + maxWaitMs += timeoutMs; + maxWaitMs += currentSleepMs; + currentSleepMs = Math.min(2 * currentSleepMs, maxSleepMs); + } + + return TimeDuration.valueOf(maxWaitMs, TimeUnit.MILLISECONDS); + } + public static void initRatisConfig(RaftProperties properties, RatisConfig config) { GrpcConfigKeys.setMessageSizeMax(properties, config.getGrpc().getMessageSizeMax()); GrpcConfigKeys.setFlowControlWindow(properties, config.getGrpc().getFlowControlWindow()); @@ -294,5 +314,8 @@ public class Utils { RaftServerConfigKeys.setSleepDeviationThreshold( properties, config.getUtils().getSleepDeviationThresholdMs()); + + final TimeDuration clientMaxRetryGap = getMaxRetrySleepTime(config.getClient()); + RaftServerConfigKeys.RetryCache.setExpiryTime(properties, clientMaxRetryGap); } } diff --git a/iotdb-core/consensus/src/test/java/org/apache/iotdb/consensus/ratis/UtilsTest.java b/iotdb-core/consensus/src/test/java/org/apache/iotdb/consensus/ratis/utils/UtilsTest.java similarity index 63% rename from iotdb-core/consensus/src/test/java/org/apache/iotdb/consensus/ratis/UtilsTest.java rename to iotdb-core/consensus/src/test/java/org/apache/iotdb/consensus/ratis/utils/UtilsTest.java index 9fe3852e85..b1ba219500 100644 --- a/iotdb-core/consensus/src/test/java/org/apache/iotdb/consensus/ratis/UtilsTest.java +++ b/iotdb-core/consensus/src/test/java/org/apache/iotdb/consensus/ratis/utils/UtilsTest.java @@ -17,16 +17,19 @@ * under the License. */ -package org.apache.iotdb.consensus.ratis; +package org.apache.iotdb.consensus.ratis.utils; import org.apache.iotdb.commons.consensus.ConfigRegionId; import org.apache.iotdb.commons.consensus.ConsensusGroupId; -import org.apache.iotdb.consensus.ratis.utils.Utils; +import org.apache.iotdb.consensus.config.RatisConfig; import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.util.TimeDuration; import org.junit.Assert; import org.junit.Test; +import java.util.concurrent.TimeUnit; + public class UtilsTest { @Test public void testEncryption() { @@ -36,4 +39,29 @@ public class UtilsTest { Assert.assertEquals(raw.getId(), cgid.getId()); Assert.assertEquals(raw.getType(), cgid.getType()); } + + @Test + public void testMaxRetryCalculation() { + // 1 0.1 + // 1 0.2 + // 1 0.4 + // 1 0.8 + // 1 1.6 + // 1 3.2 + // 1 6.4 + // 1 10 + // 1 10 + // 1 10 + // sum = 5270ms + final RatisConfig.Client clientConfig = + RatisConfig.Client.newBuilder() + .setClientMaxRetryAttempt(10) + .setClientRetryInitialSleepTimeMs(100) + .setClientRetryMaxSleepTimeMs(10000) + .setClientRequestTimeoutMillis(1000) + .build(); + Assert.assertEquals( + TimeDuration.valueOf(52700, TimeUnit.MILLISECONDS), + Utils.getMaxRetrySleepTime(clientConfig)); + } } -- GitLab