From 3734a0ff85fe2f85001175e491632cba37b10dda Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 1 Aug 2023 18:23:02 +0800 Subject: [PATCH] enh: keep retrying on syncFsmExecute failure --- source/libs/sync/src/syncPipeline.c | 15 +++++++++------ source/libs/wal/src/walMeta.c | 5 +++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 532a6955cf..0e1d107f04 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -626,12 +626,15 @@ int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t comm pEntry->term, TMSG_INFO(pEntry->originalRpcType)); } - if (syncFsmExecute(pNode, pFsm, role, currentTerm, pEntry, 0, false) != 0) { - sError("vgId:%d, failed to execute sync log entry. index:%" PRId64 ", term:%" PRId64 - ", role:%d, current term:%" PRId64, - vgId, pEntry->index, pEntry->term, role, currentTerm); - goto _out; - } + do { + if ((ret = syncFsmExecute(pNode, pFsm, role, currentTerm, pEntry, 0, false)) != 0) { + sError("vgId:%d, failed to execute sync log entry since %s. index:%" PRId64 ", term:%" PRId64 + ", role:%d, current term:%" PRId64, + vgId, terrstr(), pEntry->index, pEntry->term, role, currentTerm); + taosMsleep(10); + } + } while (ret != 0 && terrno != TSDB_CODE_VND_INVALID_VGROUP_ID); + pBuf->commitIndex = index; sTrace("vgId:%d, committed index:%" PRId64 ", term:%" PRId64 ", role:%d, current term:%" PRId64 "", pNode->vgId, diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index e700ef3d0a..c6afcb19cc 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -339,8 +339,9 @@ bool walLogEntriesComplete(const SWal* pWal) { } if (!complete) { - wError("vgId:%d, WAL log entries incomplete in range [%" PRId64 ", %" PRId64 "], aligned with snaphotVer:%" PRId64, - pWal->cfg.vgId, pWal->vers.firstVer, pWal->vers.lastVer, pWal->vers.snapshotVer); + wError("vgId:%d, WAL log entries incomplete in range [%" PRId64 ", %" PRId64 "], missing index:%" PRId64 + ", snaphotVer:%" PRId64, + pWal->cfg.vgId, pWal->vers.firstVer, pWal->vers.lastVer, index, pWal->vers.snapshotVer); terrno = TSDB_CODE_WAL_LOG_INCOMPLETE; } -- GitLab