未验证 提交 a7cd465a 编写于 作者: S Shengliang Guan 提交者: GitHub

Merge pull request #18544 from taosdata/feature/3.0_mhli

refactor(sync): add trace log for elect count, become leader count, c…
......@@ -162,7 +162,10 @@ typedef struct SSyncFSM {
// SWal implements it
typedef struct SSyncLogStore {
SLRUCache* pCache;
void* data;
int32_t cacheHit;
int32_t cacheMiss;
void* data;
int32_t (*syncLogUpdateCommitIndex)(struct SSyncLogStore* pLogStore, SyncIndex index);
SyncIndex (*syncLogCommitIndex)(struct SSyncLogStore* pLogStore);
......
......@@ -192,6 +192,10 @@ typedef struct SSyncNode {
int64_t leaderTime;
int64_t lastReplicateTime;
int32_t electNum;
int32_t becomeLeaderNum;
int32_t configChangeNum;
bool isStart;
} SSyncNode;
......
......@@ -205,9 +205,11 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
pLocalEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, hLocal);
code = 0;
ths->pLogStore->cacheHit++;
sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", appendIndex, pLocalEntry->bytes, pLocalEntry);
} else {
ths->pLogStore->cacheMiss++;
sNTrace(ths, "miss cache index:%" PRId64, appendIndex);
code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry);
......
......@@ -117,9 +117,11 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
if (h) {
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
pSyncNode->pLogStore->cacheHit++;
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", index, pEntry->bytes, pEntry);
} else {
pSyncNode->pLogStore->cacheMiss++;
sNTrace(pSyncNode, "miss cache index:%" PRId64, index);
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
......
......@@ -61,7 +61,8 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
}
int32_t syncNodeElect(SSyncNode* pSyncNode) {
sNTrace(pSyncNode, "begin election");
sNInfo(pSyncNode, "begin election");
pSyncNode->electNum++;
int32_t ret = 0;
if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) {
......@@ -86,7 +87,7 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) {
syncNodeCandidate2Leader(pSyncNode);
pSyncNode->pVotesGranted->toLeader = true;
return ret;
}
}
if (pSyncNode->replicaNum == 1) {
// only myself, to leader
......@@ -98,7 +99,6 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) {
syncNodeCandidate2Leader(pSyncNode);
pSyncNode->pVotesGranted->toLeader = true;
return ret;
}
ret = syncNodeRequestVotePeers(pSyncNode);
......
......@@ -410,9 +410,11 @@ bool syncIsReadyForRead(int64_t rid) {
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
code = 0;
pSyncNode->pLogStore->cacheHit++;
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", lastIndex, pEntry->bytes, pEntry);
} else {
pSyncNode->pLogStore->cacheMiss++;
sNTrace(pSyncNode, "miss cache index:%" PRId64, lastIndex);
code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, lastIndex, &pEntry);
......@@ -1008,6 +1010,10 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID);
pSyncNode->isStart = true;
pSyncNode->electNum = 0;
pSyncNode->becomeLeaderNum = 0;
pSyncNode->configChangeNum = 0;
sNTrace(pSyncNode, "sync open, node:%p", pSyncNode);
return pSyncNode;
......@@ -1340,6 +1346,8 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
pSyncNode->pRaftCfg->cfg = *pNewConfig;
pSyncNode->pRaftCfg->lastConfigIndex = lastConfigChangeIndex;
pSyncNode->configChangeNum++;
bool IamInOld = syncNodeInConfig(pSyncNode, &oldConfig);
bool IamInNew = syncNodeInConfig(pSyncNode, pNewConfig);
......@@ -1363,7 +1371,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
char newCfgStr[1024] = {0};
syncCfg2SimpleStr(&oldConfig, oldCfgStr, sizeof(oldCfgStr));
syncCfg2SimpleStr(pNewConfig, oldCfgStr, sizeof(oldCfgStr));
sNTrace(pSyncNode, "begin do config change, from %s to %s", oldCfgStr, oldCfgStr);
sNInfo(pSyncNode, "begin do config change, from %s to %s", oldCfgStr, oldCfgStr);
if (IamInNew) {
pSyncNode->pRaftCfg->isStandBy = 0; // change isStandBy to normal
......@@ -1495,13 +1503,13 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
} else {
// persist cfg
raftCfgPersist(pSyncNode->pRaftCfg);
sNTrace(pSyncNode, "do not config change from %d to %d, index:%" PRId64 ", %s --> %s", oldConfig.replicaNum,
pNewConfig->replicaNum, lastConfigChangeIndex, oldCfgStr, newCfgStr);
sNInfo(pSyncNode, "do not config change from %d to %d, index:%" PRId64 ", %s --> %s", oldConfig.replicaNum,
pNewConfig->replicaNum, lastConfigChangeIndex, oldCfgStr, newCfgStr);
}
_END:
// log end config change
sNTrace(pSyncNode, "end do config change, from %s to %s", oldCfgStr, newCfgStr);
sNInfo(pSyncNode, "end do config change, from %s to %s", oldCfgStr, newCfgStr);
}
// raft state change --------------
......@@ -1598,6 +1606,8 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) {
void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode->leaderTime = taosGetTimestampMs();
pSyncNode->becomeLeaderNum++;
// reset restoreFinish
pSyncNode->restoreFinish = false;
......@@ -1666,7 +1676,7 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
// trace log
sNTrace(pSyncNode, "become leader %s", debugStr);
sNInfo(pSyncNode, "become leader %s", debugStr);
}
void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
......@@ -1842,9 +1852,11 @@ SyncTerm syncNodeGetPreTerm(SSyncNode* pSyncNode, SyncIndex index) {
pPreEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
code = 0;
pSyncNode->pLogStore->cacheHit++;
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", preIndex, pPreEntry->bytes, pPreEntry);
} else {
pSyncNode->pLogStore->cacheMiss++;
sNTrace(pSyncNode, "miss cache index:%" PRId64, preIndex);
code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, preIndex, &pPreEntry);
......@@ -1971,7 +1983,7 @@ static void syncNodeEqHeartbeatTimer(void* param, void* tmrId) {
return;
}
sTrace("enqueue heartbeat timer");
sTrace("vgId:%d, enqueue heartbeat timer", pNode->vgId);
code = pNode->syncEqMsg(pNode->msgcb, &rpcMsg);
if (code != 0) {
sError("failed to enqueue heartbeat msg since %s", terrstr());
......@@ -2526,9 +2538,11 @@ int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endInde
if (h) {
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
ths->pLogStore->cacheHit++;
sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", i, pEntry->bytes, pEntry);
} else {
ths->pLogStore->cacheMiss++;
sNTrace(ths, "miss cache index:%" PRId64, i);
code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry);
......
......@@ -45,6 +45,9 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) {
return NULL;
}
pLogStore->cacheHit = 0;
pLogStore->cacheMiss = 0;
taosLRUCacheSetStrictCapacity(pLogStore->pCache, false);
pLogStore->data = taosMemoryMalloc(sizeof(SSyncLogStoreData));
......
......@@ -80,9 +80,11 @@ int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId, bool snapsh
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
code = 0;
pSyncNode->pLogStore->cacheHit++;
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", nextIndex, pEntry->bytes, pEntry);
} else {
pSyncNode->pLogStore->cacheMiss++;
sNTrace(pSyncNode, "miss cache index:%" PRId64, nextIndex);
code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry);
......
......@@ -52,7 +52,7 @@ static void syncNodeCleanConfigIndex(SSyncNode* ths) {
}
static int32_t syncNodeTimerRoutine(SSyncNode* ths) {
sNTrace(ths, "timer routines");
sNInfo(ths, "timer routines");
// timer replicate
syncNodeReplicate(ths);
......
......@@ -242,6 +242,9 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo
logBeginIndex = pNode->pLogStore->syncLogBeginIndex(pNode->pLogStore);
}
int32_t cacheHit = pNode->pLogStore->cacheHit;
int32_t cacheMiss = pNode->pLogStore->cacheMiss;
char cfgStr[1024];
if (pNode->pRaftCfg != NULL) {
syncCfg2SimpleStr(&(pNode->pRaftCfg->cfg), cfgStr, sizeof(cfgStr));
......@@ -275,18 +278,18 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo
terrno = errCode;
if (pNode != NULL && pNode->pRaftCfg != NULL) {
taosPrintLog(flags, level, dflag,
"vgId:%d, sync %s "
"%s"
", tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64 ", snap:%" PRId64
", snap-tm:%" PRIu64 ", sby:%d, aq:%d, snaping:%" PRId64 ", r-num:%d, lcfg:%" PRId64
", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s, %s, %s, %s",
pNode->vgId, syncStr(pNode->state), eventLog, currentTerm, pNode->commitIndex, logBeginIndex,
logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm,
pNode->pRaftCfg->isStandBy, aqItems, pNode->snapshottingIndex, pNode->replicaNum,
pNode->pRaftCfg->lastConfigIndex, pNode->changing, pNode->restoreFinish, quorum,
pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, peerStr, cfgStr, hbTimeStr,
hbrTimeStr);
taosPrintLog(
flags, level, dflag,
"vgId:%d, sync %s "
"%s"
", tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64 ", snap:%" PRId64
", snap-tm:%" PRIu64 ", elt-num:%d, bl-num:%d, cc-num:%d, hit:%d, mis:%d, aq:%d, snaping:%" PRId64
", r-num:%d, lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s, %s, %s, %s",
pNode->vgId, syncStr(pNode->state), eventLog, currentTerm, pNode->commitIndex, logBeginIndex, logLastIndex,
pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, pNode->electNum, pNode->becomeLeaderNum,
pNode->configChangeNum, cacheHit, cacheMiss, aqItems, pNode->snapshottingIndex, pNode->replicaNum,
pNode->pRaftCfg->lastConfigIndex, pNode->changing, pNode->restoreFinish, quorum, pNode->electTimerLogicClock,
pNode->heartbeatTimerLogicClockUser, peerStr, cfgStr, hbTimeStr, hbrTimeStr);
}
}
......@@ -438,7 +441,8 @@ void syncLogRecvAppendEntriesReply(SSyncNode* pSyncNode, const SyncAppendEntries
host, port, pMsg->term, pMsg->privateTerm, pMsg->success, pMsg->lastSendIndex, pMsg->matchIndex, s);
}
void syncLogSendHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, bool printX, int64_t timerElapsed, int64_t execTime) {
void syncLogSendHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, bool printX, int64_t timerElapsed,
int64_t execTime) {
if (!(sDebugFlag & DEBUG_TRACE)) return;
char host[64];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册