提交 b5547c7a 编写于 作者: M Minghao Li

sync refactor

上级 ef61d3ee
...@@ -94,6 +94,7 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { ...@@ -94,6 +94,7 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) {
} }
assert(pMsg->term <= ths->pRaftStore->currentTerm); assert(pMsg->term <= ths->pRaftStore->currentTerm);
// reset elect timer
if (pMsg->term == ths->pRaftStore->currentTerm) { if (pMsg->term == ths->pRaftStore->currentTerm) {
ths->leaderCache = pMsg->srcId; ths->leaderCache = pMsg->srcId;
syncNodeResetElectTimer(ths); syncNodeResetElectTimer(ths);
...@@ -135,38 +136,117 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { ...@@ -135,38 +136,117 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) {
if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE) { if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE) {
syncNodeBecomeFollower(ths); syncNodeBecomeFollower(ths);
// need ret? // ret or reply?
return ret; return ret;
} }
// accept request // accept request
if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_FOLLOWER && logOK) { if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_FOLLOWER && logOK) {
bool matchSuccess = false; bool preMatch = false;
if (pMsg->prevLogIndex == SYNC_INDEX_INVALID && if (pMsg->prevLogIndex == SYNC_INDEX_INVALID &&
ths->pLogStore->getLastIndex(ths->pLogStore) == SYNC_INDEX_INVALID) { ths->pLogStore->getLastIndex(ths->pLogStore) == SYNC_INDEX_INVALID) {
matchSuccess = true; preMatch = true;
} }
if (pMsg->prevLogIndex >= SYNC_INDEX_BEGIN && pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) { if (pMsg->prevLogIndex >= SYNC_INDEX_BEGIN && pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) {
SSyncRaftEntry* pPreEntry = logStoreGetEntry(ths->pLogStore, pMsg->prevLogIndex); SSyncRaftEntry* pPreEntry = logStoreGetEntry(ths->pLogStore, pMsg->prevLogIndex);
assert(pPreEntry != NULL); assert(pPreEntry != NULL);
if (pMsg->prevLogTerm == pPreEntry->term) { if (pMsg->prevLogTerm == pPreEntry->term) {
matchSuccess = true; preMatch = true;
} }
syncEntryDestory(pPreEntry); syncEntryDestory(pPreEntry);
} }
if (matchSuccess) { if (preMatch) {
// delete conflict entries // must has preIndex in local log
if (pMsg->prevLogIndex < ths->pLogStore->getLastIndex(ths->pLogStore)) { assert(pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore));
SyncIndex fromIndex = pMsg->prevLogIndex + 1;
ths->pLogStore->truncate(ths->pLogStore, fromIndex); bool hasExtraEntries = pMsg->prevLogIndex < ths->pLogStore->getLastIndex(ths->pLogStore);
bool hasAppendEntries = pMsg->dataLen > 0;
if (hasExtraEntries && hasAppendEntries) {
// conflict
bool conflict = false;
SyncIndex extraIndex = pMsg->prevLogIndex + 1;
SSyncRaftEntry* pExtraEntry = logStoreGetEntry(ths->pLogStore, extraIndex);
assert(pExtraEntry != NULL);
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
assert(pAppendEntry != NULL);
assert(extraIndex == pAppendEntry->index);
if (pExtraEntry->term == pAppendEntry->term) {
conflict = true;
} }
// append one entry if (conflict) {
if (pMsg->dataLen > 0) { // roll back
SSyncRaftEntry* pEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); SyncIndex delBegin = ths->pLogStore->getLastIndex(ths->pLogStore);
ths->pLogStore->appendEntry(ths->pLogStore, pEntry); SyncIndex delEnd = extraIndex;
syncEntryDestory(pEntry);
// notice! reverse roll back!
for (SyncIndex index = delEnd; index >= delBegin; --index) {
if (ths->pFsm->FpRollBackCb != NULL) {
SSyncRaftEntry* pRollBackEntry = logStoreGetEntry(ths->pLogStore, index);
assert(pRollBackEntry != NULL);
SRpcMsg rpcMsg;
syncEntry2OriginalRpc(pRollBackEntry, &rpcMsg);
ths->pFsm->FpRollBackCb(ths->pFsm, &rpcMsg, pRollBackEntry->index, pRollBackEntry->isWeak, 0);
rpcFreeCont(rpcMsg.pCont);
syncEntryDestory(pRollBackEntry);
}
}
// delete confict entries
ths->pLogStore->truncate(ths->pLogStore, extraIndex);
// append new entries
ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry);
// pre commit
SRpcMsg rpcMsg;
syncEntry2OriginalRpc(pAppendEntry, &rpcMsg);
if (ths->pFsm != NULL) {
if (ths->pFsm->FpPreCommitCb != NULL) {
ths->pFsm->FpPreCommitCb(ths->pFsm, &rpcMsg, pAppendEntry->index, pAppendEntry->isWeak, 0);
}
}
rpcFreeCont(rpcMsg.pCont);
}
// free memory
syncEntryDestory(pExtraEntry);
syncEntryDestory(pAppendEntry);
} else if (hasExtraEntries && !hasAppendEntries) {
// do nothing
} else if (!hasExtraEntries && hasAppendEntries) {
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
assert(pAppendEntry != NULL);
// append new entries
ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry);
// pre commit
SRpcMsg rpcMsg;
syncEntry2OriginalRpc(pAppendEntry, &rpcMsg);
if (ths->pFsm != NULL) {
if (ths->pFsm->FpPreCommitCb != NULL) {
ths->pFsm->FpPreCommitCb(ths->pFsm, &rpcMsg, pAppendEntry->index, pAppendEntry->isWeak, 0);
}
}
rpcFreeCont(rpcMsg.pCont);
// free memory
syncEntryDestory(pAppendEntry);
} else if (!hasExtraEntries && !hasAppendEntries) {
// do nothing
} else {
assert(0);
} }
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(); SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild();
...@@ -175,7 +255,7 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { ...@@ -175,7 +255,7 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) {
pReply->term = ths->pRaftStore->currentTerm; pReply->term = ths->pRaftStore->currentTerm;
pReply->success = true; pReply->success = true;
if (pMsg->dataLen > 0) { if (hasAppendEntries > 0) {
pReply->matchIndex = pMsg->prevLogIndex + 1; pReply->matchIndex = pMsg->prevLogIndex + 1;
} else { } else {
pReply->matchIndex = pMsg->prevLogIndex; pReply->matchIndex = pMsg->prevLogIndex;
...@@ -201,11 +281,38 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { ...@@ -201,11 +281,38 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) {
syncAppendEntriesReplyDestroy(pReply); syncAppendEntriesReplyDestroy(pReply);
} }
// maybe update commit index from leader
if (pMsg->commitIndex > ths->commitIndex) { if (pMsg->commitIndex > ths->commitIndex) {
// has commit entry in local
if (pMsg->commitIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) { if (pMsg->commitIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) {
// commit SyncIndex beginIndex = ths->commitIndex + 1;
SyncIndex endIndex = pMsg->commitIndex;
// update commit index
ths->commitIndex = pMsg->commitIndex; ths->commitIndex = pMsg->commitIndex;
// call back Wal
ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex); ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex);
// execute fsm
if (ths->pFsm != NULL) {
for (SyncIndex i = beginIndex; i <= endIndex; ++i) {
if (i != SYNC_INDEX_INVALID) {
SSyncRaftEntry* pEntry = ths->pLogStore->getEntry(ths->pLogStore, i);
assert(pEntry != NULL);
SRpcMsg rpcMsg;
syncEntry2OriginalRpc(pEntry, &rpcMsg);
if (ths->pFsm->FpCommitCb != NULL) {
ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, pEntry->index, pEntry->isWeak, 0);
}
rpcFreeCont(rpcMsg.pCont);
syncEntryDestory(pEntry);
}
}
}
} }
} }
} }
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "syncIndexMgr.h" #include "syncIndexMgr.h"
#include "syncInt.h" #include "syncInt.h"
#include "syncRaftLog.h" #include "syncRaftLog.h"
#include "syncRaftStore.h"
// \* Leader i advances its commitIndex. // \* Leader i advances its commitIndex.
// \* This is done as a separate step from handling AppendEntries responses, // \* This is done as a separate step from handling AppendEntries responses,
...@@ -50,16 +51,30 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { ...@@ -50,16 +51,30 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
for (SyncIndex index = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore); index > pSyncNode->commitIndex; for (SyncIndex index = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore); index > pSyncNode->commitIndex;
++index) { ++index) {
if (syncAgree(pSyncNode, index)) { if (syncAgree(pSyncNode, index)) {
// term
SSyncRaftEntry* pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, index);
assert(pEntry != NULL);
// cannot commit, even if quorum agree. need check term!
if (pEntry->term == pSyncNode->pRaftStore->currentTerm) {
// update commit index
newCommitIndex = index; newCommitIndex = index;
break; break;
} }
} }
}
if (newCommitIndex > pSyncNode->commitIndex) { if (newCommitIndex > pSyncNode->commitIndex) {
SyncIndex beginIndex = pSyncNode->commitIndex + 1; SyncIndex beginIndex = pSyncNode->commitIndex + 1;
SyncIndex endIndex = newCommitIndex; SyncIndex endIndex = newCommitIndex;
// update commit index
pSyncNode->commitIndex = newCommitIndex; pSyncNode->commitIndex = newCommitIndex;
// call back Wal
pSyncNode->pLogStore->updateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
// execute fsm
if (pSyncNode->pFsm != NULL) { if (pSyncNode->pFsm != NULL) {
for (SyncIndex i = beginIndex; i <= endIndex; ++i) { for (SyncIndex i = beginIndex; i <= endIndex; ++i) {
if (i != SYNC_INDEX_INVALID) { if (i != SYNC_INDEX_INVALID) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册