From ca4d7329ce971984b854506e538bbe9583302356 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Thu, 23 Jun 2022 14:07:18 +0800 Subject: [PATCH] refactor(sync): modify wal error log --- source/libs/sync/src/syncAppendEntries.c | 27 +++-------- source/libs/sync/src/syncAppendEntriesReply.c | 42 ++++++++--------- source/libs/sync/src/syncMain.c | 5 +- source/libs/sync/src/syncRaftLog.c | 46 ++++++++++++++----- 4 files changed, 64 insertions(+), 56 deletions(-) diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index bc5342cc7e..31176d942e 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -466,14 +466,11 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs int32_t code = 0; // print log - char logBuf[128] = {0}; - snprintf(logBuf, sizeof(logBuf), "recv SyncAppendEntries, vgId:%d, term:%lu", ths->vgId, - ths->pRaftStore->currentTerm); - syncAppendEntriesLog2(logBuf, pMsg); + syncAppendEntriesLog2("==syncNodeOnAppendEntriesSnapshotCb==", pMsg); // if already drop replica, do not process if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - sInfo("recv SyncAppendEntries maybe replica already dropped"); + syncNodeEventLog(ths, "recv sync-append-entries, maybe replica already dropped"); return ret; } @@ -497,7 +494,7 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs do { bool condition = pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE; if (condition) { - sTrace("recv SyncAppendEntries, candidate to follower"); + syncNodeEventLog(ths, "recv sync-append-entries, candidate to follower"); syncNodeBecomeFollower(ths, "from candidate by append entries"); // do not reply? @@ -538,11 +535,7 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs bool condition = condition1 || condition2 || condition3 || condition4; if (condition) { - sTrace( - "recv SyncAppendEntries, fake match, myLastIndex:%ld, syncLogBeginIndex:%ld, syncLogEndIndex:%ld, " - "condition1:%d, condition2:%d, condition3:%d, condition4:%d", - myLastIndex, ths->pLogStore->syncLogBeginIndex(ths->pLogStore), - ths->pLogStore->syncLogEndIndex(ths->pLogStore), condition1, condition2, condition3, condition4); + syncNodeEventLog(ths, "recv sync-append-entries, fake match"); // prepare response msg SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); @@ -576,8 +569,7 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && (pMsg->prevLogIndex <= ths->commitIndex); if (condition) { - sTrace("recv SyncAppendEntries, fake match2, msg-prevLogIndex:%ld, my-commitIndex:%ld", pMsg->prevLogIndex, - ths->commitIndex); + syncNodeEventLog(ths, "recv sync-append-entries, fake match2"); SyncIndex matchIndex = ths->commitIndex; bool hasAppendEntries = pMsg->dataLen > 0; @@ -650,11 +642,7 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs bool condition = condition1 || condition2; if (condition) { - sTrace( - "recv SyncAppendEntries, not match, syncLogBeginIndex:%ld, syncLogEndIndex:%ld, condition1:%d, " - "condition2:%d, logOK:%d", - ths->pLogStore->syncLogBeginIndex(ths->pLogStore), ths->pLogStore->syncLogEndIndex(ths->pLogStore), - condition1, condition2, logOK); + syncNodeEventLog(ths, "recv sync-append-entries, not match"); // prepare response msg SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); @@ -693,8 +681,7 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs // has entries in SyncAppendEntries msg bool hasAppendEntries = pMsg->dataLen > 0; - sTrace("recv SyncAppendEntries, match, myLastIndex:%ld, hasExtraEntries:%d, hasAppendEntries:%d", myLastIndex, - hasExtraEntries, hasAppendEntries); + syncNodeEventLog(ths, "recv sync-append-entries, match"); if (hasExtraEntries) { // make log same, rollback deleted entries diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index f13a3604da..6ca356b4f6 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -101,32 +101,27 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries int32_t ret = 0; // print log - char logBuf[128] = {0}; - snprintf(logBuf, sizeof(logBuf), "recv SyncAppendEntriesReply, vgId:%d, term:%lu", ths->vgId, - ths->pRaftStore->currentTerm); - syncAppendEntriesReplyLog2(logBuf, pMsg); + syncAppendEntriesReplyLog2("==syncNodeOnAppendEntriesReplySnapshotCb==", pMsg); // if already drop replica, do not process if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - sInfo("recv SyncAppendEntriesReply, maybe replica already dropped"); - return ret; + syncNodeEventLog(ths, "recv sync-append-entries-reply, maybe replica already dropped"); + return 0; } // drop stale response if (pMsg->term < ths->pRaftStore->currentTerm) { - sTrace("recv SyncAppendEntriesReply, drop stale response, receive_term:%lu current_term:%lu", pMsg->term, - ths->pRaftStore->currentTerm); - return ret; + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "recv sync-append-entries-reply, recv-term:%lu, drop stale response", pMsg->term); + syncNodeEventLog(ths, logBuf); + return 0; } - syncIndexMgrLog2("recv SyncAppendEntriesReply, before pNextIndex:", ths->pNextIndex); - syncIndexMgrLog2("recv SyncAppendEntriesReply, before pMatchIndex:", ths->pMatchIndex); if (gRaftDetailLog) { - SSnapshot snapshot; - ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot); - sTrace("recv SyncAppendEntriesReply, before snapshot.lastApplyIndex:%ld, snapshot.lastApplyTerm:%lu", - snapshot.lastApplyIndex, snapshot.lastApplyTerm); + syncNodeEventLog(ths, "recv sync-append-entries-reply, before"); } + syncIndexMgrLog2("recv sync-append-entries-reply, before pNextIndex:", ths->pNextIndex); + syncIndexMgrLog2("recv sync-append-entries-reply, before pMatchIndex:", ths->pMatchIndex); // no need this code, because if I receive reply.term, then I must have sent for that term. // if (pMsg->term > ths->pRaftStore->currentTerm) { @@ -134,12 +129,10 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries // } if (pMsg->term > ths->pRaftStore->currentTerm) { - char logBuf[128] = {0}; - snprintf(logBuf, sizeof(logBuf), "recv SyncAppendEntriesReply, error term, receive_term:%lu current_term:%lu", - pMsg->term, ths->pRaftStore->currentTerm); - syncNodeLog2(logBuf, ths); - sError("%s", logBuf); - return ret; + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "recv sync-append-entries-reply, error term, recv-term:%lu", pMsg->term); + syncNodeErrorLog(ths, logBuf); + return -1; } ASSERT(pMsg->term == ths->pRaftStore->currentTerm); @@ -228,8 +221,11 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries } } - syncIndexMgrLog2("recv SyncAppendEntriesReply, after pNextIndex:", ths->pNextIndex); - syncIndexMgrLog2("recv SyncAppendEntriesReply, after pMatchIndex:", ths->pMatchIndex); + if (gRaftDetailLog) { + syncNodeEventLog(ths, "recv sync-append-entries-reply, after"); + } + syncIndexMgrLog2("recv sync-append-entries-reply, after pNextIndex:", ths->pNextIndex); + syncIndexMgrLog2("recv sync-append-entries-reply, after pMatchIndex:", ths->pMatchIndex); return ret; } \ No newline at end of file diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 732f450f4c..8979f244f0 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -416,7 +416,7 @@ int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct } } sMeta->lastConfigIndex = lastIndex; - sTrace("vgId:%d, get snapshot meta by index:%" PRId64 " lastConfigIndex:%" PRId64, pSyncNode->vgId, snapshotIndex, + sTrace("vgId:%d, get snapshot meta by index:%" PRId64 " lcindex:%" PRId64, pSyncNode->vgId, snapshotIndex, sMeta->lastConfigIndex); taosReleaseRef(tsNodeRefId, pSyncNode->rid); @@ -433,8 +433,9 @@ SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapsho lastIndex = (pSyncNode->pRaftCfg->configIndexArr)[i]; } } + sTrace("vgId:%d, sync get snapshot last config index, index:%ld lcindex:%ld", pSyncNode->vgId, snapshotLastApplyIndex, + lastIndex); - sTrace("sync syncNodeGetSnapshotConfigIndex index:%ld lastConfigIndex:%ld", snapshotLastApplyIndex, lastIndex); return lastIndex; } diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 701b4a7b93..f000c9f117 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -46,13 +46,13 @@ static SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore); // refactor, log[0 .. n] ==> log[m .. n] static int32_t raftLogSetBeginIndex(struct SSyncLogStore* pLogStore, SyncIndex beginIndex) { - sTrace("raftLogSetBeginIndex beginIndex:%ld", beginIndex); - // if beginIndex == 0, donot need call this funciton ASSERT(beginIndex > 0); SSyncLogStoreData* pData = pLogStore->data; SWal* pWal = pData->pWal; + sTrace("vgId:%d, reset wal begin index:%ld", pData->pSyncNode->vgId, beginIndex); + pData->beginIndex = beginIndex; walRestoreFromSnapshot(pWal, beginIndex - 1); return 0; @@ -160,8 +160,12 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr const char* errStr = tstrerror(err); int32_t sysErr = errno; const char* sysErrStr = strerror(errno); - sError("vgId:%d wal write error, index:%ld, err:%d %X, msg:%s, syserr:%d, sysmsg:%s", pData->pSyncNode->vgId, - pEntry->index, err, err, errStr, sysErr, sysErrStr); + + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "wal write error, index:%ld, err:%d %X, msg:%s, syserr:%d, sysmsg:%s", + pEntry->index, err, err, errStr, sysErr, sysErrStr); + syncNodeErrorLog(pData->pSyncNode, logBuf); + ASSERT(0); } @@ -240,8 +244,17 @@ static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, const char* errStr = tstrerror(err); int32_t sysErr = errno; const char* sysErrStr = strerror(errno); - sError("vgId:%d wal read error, index:%ld, err:%d %X, msg:%s, syserr:%d, sysmsg:%s", pData->pSyncNode->vgId, index, - err, err, errStr, sysErr, sysErrStr); + + do { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "wal read error, index:%ld, err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index, err, + err, errStr, sysErr, sysErrStr); + if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { + syncNodeEventLog(pData->pSyncNode, logBuf); + } else { + syncNodeErrorLog(pData->pSyncNode, logBuf); + } + } while (0); int32_t saveErr = terrno; walCloseReadHandle(pWalHandle); @@ -368,8 +381,11 @@ int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { const char* errStr = tstrerror(err); int32_t sysErr = errno; const char* sysErrStr = strerror(errno); - sError("vgId:%d wal write error, index:%ld, err:%d %X, msg:%s, syserr:%d, sysmsg:%s", pData->pSyncNode->vgId, - pEntry->index, err, err, errStr, sysErr, sysErrStr); + + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "wal write error, index:%ld, err:%d %X, msg:%s, syserr:%d, sysmsg:%s", + pEntry->index, err, err, errStr, sysErr, sysErrStr); + syncNodeErrorLog(pData->pSyncNode, logBuf); ASSERT(0); } @@ -398,12 +414,20 @@ SSyncRaftEntry* logStoreGetEntry(SSyncLogStore* pLogStore, SyncIndex index) { const char* errStr = tstrerror(err); int32_t sysErr = errno; const char* sysErrStr = strerror(errno); - sError("vgId:%d wal read error, index:%ld, err:%d %X, msg:%s, syserr:%d, sysmsg:%s", pData->pSyncNode->vgId, - index, err, err, errStr, sysErr, sysErrStr); + + do { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "wal read error, index:%ld, err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index, + err, err, errStr, sysErr, sysErrStr); + if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { + syncNodeEventLog(pData->pSyncNode, logBuf); + } else { + syncNodeErrorLog(pData->pSyncNode, logBuf); + } + } while (0); ASSERT(0); } - // ASSERT(walReadWithHandle(pWalHandle, index) == 0); SSyncRaftEntry* pEntry = syncEntryBuild(pWalHandle->pHead->head.bodyLen); ASSERT(pEntry != NULL); -- GitLab