未验证 提交 577f5655 编写于 作者: S Shengliang Guan 提交者: GitHub

Merge pull request #17564 from taosdata/feature/sync2-merge

refactor(sync): refactor sync
......@@ -99,6 +99,7 @@ int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad);
*/
int32_t mndProcessRpcMsg(SRpcMsg *pMsg);
int32_t mndProcessSyncMsg(SRpcMsg *pMsg);
int32_t mndProcessSyncCtrlMsg(SRpcMsg *pMsg);
int32_t mndPreProcessQueryMsg(SRpcMsg *pMsg);
void mndPostProcessQueryMsg(SRpcMsg *pMsg);
......
......@@ -35,7 +35,12 @@ extern bool gRaftDetailLog;
#define SYNC_MAX_PROGRESS_WAIT_MS 4000
#define SYNC_MAX_START_TIME_RANGE_MS (1000 * 20)
#define SYNC_MAX_RECV_TIME_RANGE_MS 1200
#define SYNC_DEL_WAL_MS (1000 * 60)
#define SYNC_ADD_QUORUM_COUNT 3
#define SYNC_MNODE_LOG_RETENTION 10000
#define SYNC_VNODE_LOG_RETENTION 500
#define SYNC_APPEND_ENTRIES_TIMEOUT_MS 10000
#define SYNC_MAX_BATCH_SIZE 1
#define SYNC_INDEX_BEGIN 0
......@@ -157,32 +162,15 @@ typedef struct SSyncLogStore {
SLRUCache* pCache;
void* data;
// append one log entry
int32_t (*appendEntry)(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry);
// get one log entry, user need to free pEntry->pCont
SSyncRaftEntry* (*getEntry)(struct SSyncLogStore* pLogStore, SyncIndex index);
// truncate log with index, entries after the given index (>=index) will be deleted
int32_t (*truncate)(struct SSyncLogStore* pLogStore, SyncIndex fromIndex);
// return index of last entry
SyncIndex (*getLastIndex)(struct SSyncLogStore* pLogStore);
// return term of last entry
SyncTerm (*getLastTerm)(struct SSyncLogStore* pLogStore);
// update log store commit index with "index"
int32_t (*updateCommitIndex)(struct SSyncLogStore* pLogStore, SyncIndex index);
// return commit index of log
SyncIndex (*getCommitIndex)(struct SSyncLogStore* pLogStore);
int32_t (*syncLogUpdateCommitIndex)(struct SSyncLogStore* pLogStore, SyncIndex index);
SyncIndex (*syncLogCommitIndex)(struct SSyncLogStore* pLogStore);
SyncIndex (*syncLogBeginIndex)(struct SSyncLogStore* pLogStore);
SyncIndex (*syncLogEndIndex)(struct SSyncLogStore* pLogStore);
bool (*syncLogIsEmpty)(struct SSyncLogStore* pLogStore);
int32_t (*syncLogEntryCount)(struct SSyncLogStore* pLogStore);
int32_t (*syncLogRestoreFromSnapshot)(struct SSyncLogStore* pLogStore, SyncIndex index);
bool (*syncLogIsEmpty)(struct SSyncLogStore* pLogStore);
bool (*syncLogExist)(struct SSyncLogStore* pLogStore, SyncIndex index);
SyncIndex (*syncLogWriteIndex)(struct SSyncLogStore* pLogStore);
......@@ -207,6 +195,7 @@ typedef struct SSyncInfo {
SMsgCb* msgcb;
int32_t (*FpSendMsg)(const SEpSet* pEpSet, SRpcMsg* pMsg);
int32_t (*FpEqMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg);
int32_t (*FpEqCtrlMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg);
} SSyncInfo;
int32_t syncInit();
......@@ -217,7 +206,6 @@ void syncStop(int64_t rid);
int32_t syncSetStandby(int64_t rid);
ESyncState syncGetMyRole(int64_t rid);
bool syncIsReady(int64_t rid);
bool syncIsReadyForRead(int64_t rid);
const char* syncGetMyRoleStr(int64_t rid);
bool syncRestoreFinish(int64_t rid);
SyncTerm syncGetMyTerm(int64_t rid);
......@@ -227,7 +215,7 @@ SyncGroupId syncGetVgId(int64_t rid);
void syncGetEpSet(int64_t rid, SEpSet* pEpSet);
void syncGetRetryEpSet(int64_t rid, SEpSet* pEpSet);
int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak);
int32_t syncProposeBatch(int64_t rid, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize);
// int32_t syncProposeBatch(int64_t rid, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize);
bool syncEnvIsStart();
const char* syncStr(ESyncState state);
bool syncIsRestoreFinish(int64_t rid);
......@@ -241,6 +229,9 @@ int32_t syncReconfigBuild(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg
int32_t syncLeaderTransfer(int64_t rid);
int32_t syncLeaderTransferTo(int64_t rid, SNodeInfo newLeader);
int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex);
int32_t syncEndSnapshot(int64_t rid);
#ifdef __cplusplus
}
#endif
......
......@@ -157,6 +157,8 @@ typedef enum ESyncTimeoutType {
SYNC_TIMEOUT_HEARTBEAT,
} ESyncTimeoutType;
const char* syncTimerTypeStr(enum ESyncTimeoutType timerType);
typedef struct SyncTimeout {
uint32_t bytes;
int32_t vgId;
......@@ -423,6 +425,7 @@ typedef struct SyncAppendEntriesReply {
SyncTerm privateTerm;
bool success;
SyncIndex matchIndex;
SyncIndex lastSendIndex;
int64_t startTime;
} SyncAppendEntriesReply;
......@@ -456,6 +459,8 @@ typedef struct SyncHeartbeat {
SyncTerm term;
SyncIndex commitIndex;
SyncTerm privateTerm;
SyncTerm minMatchIndex;
} SyncHeartbeat;
SyncHeartbeat* syncHeartbeatBuild(int32_t vgId);
......@@ -676,24 +681,17 @@ void syncReconfigFinishLog2(char* s, const SyncReconfigFinish* pMsg);
// on message ----------------------
int32_t syncNodeOnPingCb(SSyncNode* ths, SyncPing* pMsg);
int32_t syncNodeOnPingReplyCb(SSyncNode* ths, SyncPingReply* pMsg);
int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg);
int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncIndex* pRetIndex);
int32_t syncNodeOnClientRequestBatchCb(SSyncNode* ths, SyncClientRequestBatch* pMsg);
int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg);
int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg);
int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg);
int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg);
int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg);
int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg);
int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
int32_t syncNodeOnTimer(SSyncNode* ths, SyncTimeout* pMsg);
int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg);
int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
int32_t syncNodeOnSnapshotSendCb(SSyncNode* ths, SyncSnapshotSend* pMsg);
int32_t syncNodeOnSnapshotRspCb(SSyncNode* ths, SyncSnapshotRsp* pMsg);
int32_t syncNodeOnHeartbeat(SSyncNode* ths, SyncHeartbeat* pMsg);
int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, SyncHeartbeatReply* pMsg);
int32_t syncNodeOnClientRequest(SSyncNode* ths, SyncClientRequest* pMsg, SyncIndex* pRetIndex);
int32_t syncNodeOnRequestVote(SSyncNode* ths, SyncRequestVote* pMsg);
int32_t syncNodeOnRequestVoteReply(SSyncNode* ths, SyncRequestVoteReply* pMsg);
int32_t syncNodeOnAppendEntries(SSyncNode* ths, SyncAppendEntries* pMsg);
int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
int32_t syncNodeOnSnapshot(SSyncNode* ths, SyncSnapshotSend* pMsg);
int32_t syncNodeOnSnapshotReply(SSyncNode* ths, SyncSnapshotRsp* pMsg);
int32_t syncNodeOnHeartbeat(SSyncNode* ths, SyncHeartbeat* pMsg);
int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, SyncHeartbeatReply* pMsg);
......@@ -707,8 +705,8 @@ typedef int32_t (*FpOnRequestVoteReplyCb)(SSyncNode* ths, SyncRequestVoteReply*
typedef int32_t (*FpOnAppendEntriesCb)(SSyncNode* ths, SyncAppendEntries* pMsg);
typedef int32_t (*FpOnAppendEntriesReplyCb)(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
typedef int32_t (*FpOnTimeoutCb)(SSyncNode* pSyncNode, SyncTimeout* pMsg);
typedef int32_t (*FpOnSnapshotSendCb)(SSyncNode* ths, SyncSnapshotSend* pMsg);
typedef int32_t (*FpOnSnapshotRspCb)(SSyncNode* ths, SyncSnapshotRsp* pMsg);
typedef int32_t (*FpOnSnapshotCb)(SSyncNode* ths, SyncSnapshotSend* pMsg);
typedef int32_t (*FpOnSnapshotReplyCb)(SSyncNode* ths, SyncSnapshotRsp* pMsg);
// option ----------------------------------
bool syncNodeSnapshotEnable(SSyncNode* pSyncNode);
......
......@@ -34,6 +34,7 @@ typedef struct SMnodeMgmt {
SSingleWorker readWorker;
SSingleWorker writeWorker;
SSingleWorker syncWorker;
SSingleWorker syncCtrlWorker;
bool stopped;
int32_t refCount;
TdThreadRwlock lock;
......@@ -53,6 +54,7 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt);
void mmStopWorker(SMnodeMgmt *pMgmt);
int32_t mmPutMsgToWriteQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t mmPutMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t mmPutMsgToSyncCtrlQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t mmPutMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t mmPutMsgToQueryQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t mmPutMsgToFetchQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
......
......@@ -197,6 +197,9 @@ SArray *mmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_SEND, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_RSP, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_SYNC_HEARTBEAT, mmPutMsgToSyncCtrlQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_SYNC_HEARTBEAT_REPLY, mmPutMsgToSyncCtrlQueue, 1) == NULL) goto _OVER;
code = 0;
_OVER:
......
......@@ -67,6 +67,24 @@ static void mmProcessRpcMsg(SQueueInfo *pInfo, SRpcMsg *pMsg) {
taosFreeQitem(pMsg);
}
static void mmProcessSyncCtrlMsg(SQueueInfo *pInfo, SRpcMsg *pMsg) {
SMnodeMgmt *pMgmt = pInfo->ahandle;
pMsg->info.node = pMgmt->pMnode;
const STraceId *trace = &pMsg->info.traceId;
dGTrace("msg:%p, get from mnode-sync-ctrl queue", pMsg);
SMsgHead *pHead = pMsg->pCont;
pHead->contLen = ntohl(pHead->contLen);
pHead->vgId = ntohl(pHead->vgId);
int32_t code = mndProcessSyncCtrlMsg(pMsg);
dGTrace("msg:%p, is freed, code:0x%x", pMsg, code);
rpcFreeCont(pMsg->pCont);
taosFreeQitem(pMsg);
}
static void mmProcessSyncMsg(SQueueInfo *pInfo, SRpcMsg *pMsg) {
SMnodeMgmt *pMgmt = pInfo->ahandle;
pMsg->info.node = pMgmt->pMnode;
......@@ -108,6 +126,10 @@ int32_t mmPutMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return mmPutMsgToWorker(pMgmt, &pMgmt->syncWorker, pMsg);
}
int32_t mmPutMsgToSyncCtrlQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return mmPutMsgToWorker(pMgmt, &pMgmt->syncCtrlWorker, pMsg);
}
int32_t mmPutMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return mmPutMsgToWorker(pMgmt, &pMgmt->readWorker, pMsg);
}
......@@ -144,6 +166,9 @@ int32_t mmPutMsgToQueue(SMnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) {
case SYNC_QUEUE:
pWorker = &pMgmt->syncWorker;
break;
case SYNC_CTRL_QUEUE:
pWorker = &pMgmt->syncCtrlWorker;
break;
default:
terrno = TSDB_CODE_INVALID_PARA;
}
......@@ -223,6 +248,18 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt) {
return -1;
}
SSingleWorkerCfg scCfg = {
.min = 1,
.max = 1,
.name = "mnode-sync-ctrl",
.fp = (FItem)mmProcessSyncCtrlMsg,
.param = pMgmt,
};
if (tSingleWorkerInit(&pMgmt->syncCtrlWorker, &scCfg) != 0) {
dError("failed to start mnode mnode-sync-ctrl worker since %s", terrstr());
return -1;
}
dDebug("mnode workers are initialized");
return 0;
}
......@@ -235,5 +272,6 @@ void mmStopWorker(SMnodeMgmt *pMgmt) {
tSingleWorkerCleanup(&pMgmt->readWorker);
tSingleWorkerCleanup(&pMgmt->writeWorker);
tSingleWorkerCleanup(&pMgmt->syncWorker);
tSingleWorkerCleanup(&pMgmt->syncCtrlWorker);
dDebug("mnode workers are closed");
}
......@@ -188,7 +188,7 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
req.walRollPeriod, req.walSegmentSize, req.hashMethod, req.hashBegin, req.hashEnd, req.hashPrefix,
req.hashSuffix, req.replica, req.selfIndex, req.strict);
for (int32_t i = 0; i < req.replica; ++i) {
dInfo("vgId:%d, replica:%d fqdn:%s port:%u", req.vgId, req.replicas[i].id, req.replicas[i].fqdn,
dInfo("vgId:%d, replica:%d id:%d fqdn:%s port:%u", req.vgId, i, req.replicas[i].id, req.replicas[i].fqdn,
req.replicas[i].port);
}
vmGenerateVnodeCfg(&req, &vnodeCfg);
......
......@@ -241,6 +241,8 @@ static void *vmCloseVnodeInThread(void *param) {
static void vmCloseVnodes(SVnodeMgmt *pMgmt) {
dInfo("start to close all vnodes");
tSingleWorkerCleanup(&pMgmt->mgmtWorker);
dInfo("vnodes mgmt worker is stopped");
int32_t numOfVnodes = 0;
SVnodeObj **ppVnodes = vmGetVnodeListFromHash(pMgmt, &numOfVnodes);
......
......@@ -234,11 +234,9 @@ static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtyp
return code;
}
int32_t vmPutMsgToSyncQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_QUEUE); }
int32_t vmPutMsgToSyncCtrlQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_CTRL_QUEUE); }
int32_t vmPutMsgToSyncCtrlQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return vmPutMsgToQueue(pMgmt, pMsg, SYNC_CTRL_QUEUE);
}
int32_t vmPutMsgToSyncQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_QUEUE); }
int32_t vmPutMsgToWriteQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, WRITE_QUEUE); }
......@@ -405,7 +403,6 @@ int32_t vmStartWorker(SVnodeMgmt *pMgmt) {
}
void vmStopWorker(SVnodeMgmt *pMgmt) {
tSingleWorkerCleanup(&pMgmt->mgmtWorker);
tWWorkerCleanup(&pMgmt->writePool);
tWWorkerCleanup(&pMgmt->applyPool);
tWWorkerCleanup(&pMgmt->syncPool);
......
......@@ -250,6 +250,7 @@ static int32_t mndInitSdb(SMnode *pMnode) {
opt.path = pMnode->path;
opt.pMnode = pMnode;
opt.pWal = pMnode->pWal;
opt.sync = pMnode->syncMgmt.sync;
pMnode->pSdb = sdbInit(&opt);
if (pMnode->pSdb == NULL) {
......@@ -381,6 +382,7 @@ SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
mError("failed to open mnode since %s", terrstr());
return NULL;
}
memset(pMnode, 0, sizeof(SMnode));
char timestr[24] = "1970-01-01 00:00:00.00";
(void)taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, 0);
......@@ -474,6 +476,45 @@ void mndStop(SMnode *pMnode) {
mndCleanupTimer(pMnode);
}
int32_t mndProcessSyncCtrlMsg(SRpcMsg *pMsg) {
SMnode *pMnode = pMsg->info.node;
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
int32_t code = 0;
mInfo("vgId:%d, process sync ctrl msg", 1);
if (!syncEnvIsStart()) {
mError("failed to process sync msg:%p type:%s since syncEnv stop", pMsg, TMSG_INFO(pMsg->msgType));
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
return -1;
}
SSyncNode *pSyncNode = syncNodeAcquire(pMgmt->sync);
if (pSyncNode == NULL) {
mError("failed to process sync msg:%p type:%s since syncNode is null", pMsg, TMSG_INFO(pMsg->msgType));
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
return -1;
}
if (pMsg->msgType == TDMT_SYNC_HEARTBEAT) {
SyncHeartbeat *pSyncMsg = syncHeartbeatFromRpcMsg2(pMsg);
code = syncNodeOnHeartbeat(pSyncNode, pSyncMsg);
syncHeartbeatDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_HEARTBEAT_REPLY) {
SyncHeartbeatReply *pSyncMsg = syncHeartbeatReplyFromRpcMsg2(pMsg);
code = syncNodeOnHeartbeatReply(pSyncNode, pSyncMsg);
syncHeartbeatReplyDestroy(pSyncMsg);
}
syncNodeRelease(pSyncNode);
if (code != 0) {
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
}
return code;
}
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
SMnode *pMnode = pMsg->info.node;
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
......@@ -492,90 +533,65 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
return -1;
}
// ToDo: ugly! use function pointer
if (syncNodeStrategy(pSyncNode) == SYNC_STRATEGY_STANDARD_SNAPSHOT) {
if (pMsg->msgType == TDMT_SYNC_TIMEOUT) {
SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg);
code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg);
code = syncNodeOnTimer(pSyncNode, pSyncMsg);
syncTimeoutDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_PING) {
SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg);
code = syncNodeOnPingCb(pSyncNode, pSyncMsg);
syncPingDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) {
SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg);
code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg);
syncPingReplyDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg);
code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL);
code = syncNodeOnClientRequest(pSyncNode, pSyncMsg, NULL);
syncClientRequestDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) {
SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg);
code = syncNodeOnRequestVoteSnapshotCb(pSyncNode, pSyncMsg);
code = syncNodeOnRequestVote(pSyncNode, pSyncMsg);
syncRequestVoteDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) {
SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg);
code = syncNodeOnRequestVoteReplySnapshotCb(pSyncNode, pSyncMsg);
code = syncNodeOnRequestVoteReply(pSyncNode, pSyncMsg);
syncRequestVoteReplyDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) {
SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg);
code = syncNodeOnAppendEntriesSnapshotCb(pSyncNode, pSyncMsg);
code = syncNodeOnAppendEntries(pSyncNode, pSyncMsg);
syncAppendEntriesDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) {
SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg);
code = syncNodeOnAppendEntriesReplySnapshotCb(pSyncNode, pSyncMsg);
code = syncNodeOnAppendEntriesReply(pSyncNode, pSyncMsg);
syncAppendEntriesReplyDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) {
SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pMsg);
code = syncNodeOnSnapshotSendCb(pSyncNode, pSyncMsg);
code = syncNodeOnSnapshot(pSyncNode, pSyncMsg);
syncSnapshotSendDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) {
SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pMsg);
code = syncNodeOnSnapshotRspCb(pSyncNode, pSyncMsg);
code = syncNodeOnSnapshotReply(pSyncNode, pSyncMsg);
syncSnapshotRspDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_SET_MNODE_STANDBY) {
code = syncSetStandby(pMgmt->sync);
SRpcMsg rsp = {.code = code, .info = pMsg->info};
tmsgSendRsp(&rsp);
} else {
mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType));
code = -1;
}
} else {
if (pMsg->msgType == TDMT_SYNC_TIMEOUT) {
SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg);
code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg);
syncTimeoutDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_PING) {
SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg);
code = syncNodeOnPingCb(pSyncNode, pSyncMsg);
syncPingDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) {
SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg);
code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg);
syncPingReplyDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg);
code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL);
syncClientRequestDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) {
SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg);
code = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg);
syncRequestVoteDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) {
SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg);
code = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg);
syncRequestVoteReplyDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) {
SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg);
code = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg);
syncAppendEntriesDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) {
SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg);
code = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg);
syncAppendEntriesReplyDestroy(pSyncMsg);
} else {
mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType));
code = -1;
}
}
syncNodeRelease(pSyncNode);
......
......@@ -71,8 +71,8 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM
mInfo("trans:%d, is proposed and post sem", transId);
}
pMgmt->transId = 0;
taosWUnLockLatch(&pMgmt->lock);
tsem_post(&pMgmt->syncSem);
taosWUnLockLatch(&pMgmt->lock);
} else {
taosWUnLockLatch(&pMgmt->lock);
STrans *pTrans = mndAcquireTrans(pMnode, transId);
......@@ -113,27 +113,7 @@ void mndRestoreFinish(struct SSyncFSM *pFsm) {
}
}
void mndReConfig(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SReConfigCbMeta *cbMeta) {
SMnode *pMnode = pFsm->data;
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
pMgmt->errCode = cbMeta->code;
mInfo("trans:-1, sync reconfig is proposed, saved:%d code:0x%x, index:%" PRId64 " term:%" PRId64, pMgmt->transId,
cbMeta->code, cbMeta->index, cbMeta->term);
taosWLockLatch(&pMgmt->lock);
if (pMgmt->transId == -1) {
if (pMgmt->errCode != 0) {
mError("trans:-1, failed to propose sync reconfig since %s, post sem", tstrerror(pMgmt->errCode));
} else {
mInfo("trans:-1, sync reconfig is proposed, saved:%d code:0x%x, index:%" PRId64 " term:%" PRId64 " post sem",
pMgmt->transId, cbMeta->code, cbMeta->index, cbMeta->term);
}
pMgmt->transId = 0;
tsem_post(&pMgmt->syncSem);
}
taosWUnLockLatch(&pMgmt->lock);
}
void mndReConfig(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SReConfigCbMeta *cbMeta) {}
int32_t mndSnapshotStartRead(struct SSyncFSM *pFsm, void *pParam, void **ppReader) {
mInfo("start to read snapshot from sdb");
......@@ -179,11 +159,14 @@ void mndLeaderTransfer(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cb
static void mndBecomeFollower(struct SSyncFSM *pFsm) {
SMnode *pMnode = pFsm->data;
mInfo("vgId:1, become follower and post sem");
mInfo("vgId:1, become follower");
taosWLockLatch(&pMnode->syncMgmt.lock);
if (pMnode->syncMgmt.transId != 0) {
mInfo("vgId:1, become follower and post sem, trans:%d, failed to propose since not leader",
pMnode->syncMgmt.transId);
pMnode->syncMgmt.transId = 0;
pMnode->syncMgmt.errCode = TSDB_CODE_SYN_NOT_LEADER;
tsem_post(&pMnode->syncMgmt.syncSem);
}
taosWUnLockLatch(&pMnode->syncMgmt.lock);
......@@ -292,6 +275,7 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) {
int32_t code = syncPropose(pMgmt->sync, &req, isWeak);
if (code == 0) {
mInfo("trans:%d, is proposing and wait sem", pMgmt->transId);
tsem_wait(&pMgmt->syncSem);
} else if (code > 0) {
mInfo("trans:%d, confirm at once since replica is 1, continue execute", transId);
......@@ -301,13 +285,17 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) {
sdbWriteWithoutFree(pMnode->pSdb, pRaw);
sdbSetApplyInfo(pMnode->pSdb, req.info.conn.applyIndex, req.info.conn.applyTerm, SYNC_INDEX_INVALID);
code = 0;
} else if (code == -1 && terrno == TSDB_CODE_SYN_NOT_LEADER) {
} else {
taosWLockLatch(&pMgmt->lock);
mInfo("trans:%d, failed to proposed since %s", transId, terrstr());
pMgmt->transId = 0;
taosWUnLockLatch(&pMgmt->lock);
if (terrno == TSDB_CODE_SYN_NOT_LEADER) {
terrno = TSDB_CODE_APP_NOT_READY;
} else if (code == -1 && terrno == TSDB_CODE_SYN_INTERNAL_ERROR) {
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
} else {
terrno = TSDB_CODE_APP_ERROR;
}
}
rpcFreeCont(req.pCont);
if (code != 0) {
......@@ -315,6 +303,7 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) {
return code;
}
if (pMgmt->errCode != 0) terrno = pMgmt->errCode;
return pMgmt->errCode;
}
......@@ -328,6 +317,7 @@ void mndSyncStart(SMnode *pMnode) {
void mndSyncStop(SMnode *pMnode) {
taosWLockLatch(&pMnode->syncMgmt.lock);
if (pMnode->syncMgmt.transId != 0) {
mInfo("vgId:1, is stopped and post sem, trans:%d", pMnode->syncMgmt.transId);
pMnode->syncMgmt.transId = 0;
tsem_post(&pMnode->syncMgmt.syncSem);
}
......
......@@ -778,7 +778,7 @@ static int32_t mndTransSync(SMnode *pMnode, STrans *pTrans) {
mInfo("trans:%d, sync to other mnodes, stage:%s", pTrans->id, mndTransStr(pTrans->stage));
int32_t code = mndSyncPropose(pMnode, pRaw, pTrans->id);
if (code != 0) {
mError("trans:%d, failed to sync since %s", pTrans->id, terrstr());
mError("trans:%d, failed to sync, errno:%s code:%s", pTrans->id, terrstr(), tstrerror(code));
sdbFreeRaw(pRaw);
return -1;
}
......
......@@ -5,7 +5,8 @@ target_link_libraries(
PUBLIC sut
)
add_test(
NAME userTest
COMMAND userTest
)
#add_test(
# NAME userTest
# COMMAND userTest
#)
......@@ -5,5 +5,5 @@ target_include_directories(
PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/inc"
)
target_link_libraries(
sdb os common util wal
sdb os common util wal sync
)
......@@ -169,6 +169,7 @@ typedef struct SSdbRow {
typedef struct SSdb {
SMnode *pMnode;
SWal *pWal;
int64_t sync;
char *currDir;
char *tmpDir;
int64_t commitIndex;
......@@ -212,6 +213,7 @@ typedef struct SSdbOpt {
const char *path;
SMnode *pMnode;
SWal *pWal;
int64_t sync;
} SSdbOpt;
/**
......
......@@ -53,6 +53,7 @@ SSdb *sdbInit(SSdbOpt *pOption) {
}
pSdb->pWal = pOption->pWal;
pSdb->sync = pOption->sync;
pSdb->applyIndex = -1;
pSdb->applyTerm = -1;
pSdb->applyConfig = -1;
......
......@@ -15,6 +15,7 @@
#define _DEFAULT_SOURCE
#include "sdb.h"
#include "sync.h"
#include "tchecksum.h"
#include "wal.h"
......@@ -456,14 +457,25 @@ int32_t sdbWriteFile(SSdb *pSdb, int32_t delta) {
taosThreadMutexLock(&pSdb->filelock);
if (pSdb->pWal != NULL) {
code = walBeginSnapshot(pSdb->pWal, pSdb->applyIndex);
// code = walBeginSnapshot(pSdb->pWal, pSdb->applyIndex);
if (pSdb->sync == 0) {
code = 0;
} else {
code = syncBeginSnapshot(pSdb->sync, pSdb->applyIndex);
}
}
if (code == 0) {
code = sdbWriteFileImp(pSdb);
}
if (code == 0) {
if (pSdb->pWal != NULL) {
code = walEndSnapshot(pSdb->pWal);
// code = walEndSnapshot(pSdb->pWal);
if (pSdb->sync == 0) {
code = 0;
} else {
code = syncEndSnapshot(pSdb->sync);
}
}
}
if (code != 0) {
......
......@@ -228,7 +228,9 @@ int vnodeCommit(SVnode *pVnode) {
code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
}
walBeginSnapshot(pVnode->pWal, pVnode->state.applied);
// walBeginSnapshot(pVnode->pWal, pVnode->state.applied);
syncBeginSnapshot(pVnode->sync, pVnode->state.applied);
code = smaPreCommit(pVnode->pSma);
TSDB_CHECK_CODE(code, lino, _exit);
......@@ -282,7 +284,8 @@ int vnodeCommit(SVnode *pVnode) {
}
// apply the commit (TODO)
walEndSnapshot(pVnode->pWal);
// walEndSnapshot(pVnode->pWal);
syncEndSnapshot(pVnode->sync);
_exit:
if (code) {
......
......@@ -292,117 +292,68 @@ int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
vGTrace("vgId:%d, sync msg:%p will be processed, type:%s", pVnode->config.vgId, pMsg, TMSG_INFO(pMsg->msgType));
if (syncNodeStrategy(pSyncNode) == SYNC_STRATEGY_NO_SNAPSHOT) {
if (pMsg->msgType == TDMT_SYNC_TIMEOUT) {
SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg);
code = syncNodeOnTimer(pSyncNode, pSyncMsg);
syncTimeoutDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_PING) {
SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnPingCb(pSyncNode, pSyncMsg);
syncPingDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) {
SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg);
syncPingReplyDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL);
code = syncNodeOnClientRequest(pSyncNode, pSyncMsg, NULL);
syncClientRequestDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST_BATCH) {
SyncClientRequestBatch *pSyncMsg = syncClientRequestBatchFromRpcMsg(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnClientRequestBatchCb(pSyncNode, pSyncMsg);
syncClientRequestBatchDestroyDeep(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) {
SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg);
code = syncNodeOnRequestVote(pSyncNode, pSyncMsg);
syncRequestVoteDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) {
SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg);
code = syncNodeOnRequestVoteReply(pSyncNode, pSyncMsg);
syncRequestVoteReplyDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) {
SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg);
code = syncNodeOnAppendEntries(pSyncNode, pSyncMsg);
syncAppendEntriesDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) {
SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg);
syncAppendEntriesReplyDestroy(pSyncMsg);
} else {
vGError("vgId:%d, msg:%p failed to process since error msg type:%d", pVnode->config.vgId, pMsg, pMsg->msgType);
code = -1;
}
} else if (syncNodeStrategy(pSyncNode) == SYNC_STRATEGY_WAL_FIRST) {
// use wal first strategy
if (pMsg->msgType == TDMT_SYNC_TIMEOUT) {
SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg);
syncTimeoutDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_PING) {
SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnPingCb(pSyncNode, pSyncMsg);
syncPingDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) {
SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg);
syncPingReplyDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL);
syncClientRequestDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST_BATCH) {
SyncClientRequestBatch *pSyncMsg = syncClientRequestBatchFromRpcMsg(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnClientRequestBatchCb(pSyncNode, pSyncMsg);
syncClientRequestBatchDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) {
SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnRequestVoteSnapshotCb(pSyncNode, pSyncMsg);
syncRequestVoteDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) {
SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnRequestVoteReplySnapshotCb(pSyncNode, pSyncMsg);
syncRequestVoteReplyDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_BATCH) {
SyncAppendEntriesBatch *pSyncMsg = syncAppendEntriesBatchFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnAppendEntriesSnapshot2Cb(pSyncNode, pSyncMsg);
syncAppendEntriesBatchDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) {
SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg);
ASSERT(pSyncMsg != NULL);
code = syncNodeOnAppendEntriesReplySnapshot2Cb(pSyncNode, pSyncMsg);
code = syncNodeOnAppendEntriesReply(pSyncNode, pSyncMsg);
syncAppendEntriesReplyDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) {
SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pMsg);
code = syncNodeOnSnapshotSendCb(pSyncNode, pSyncMsg);
code = syncNodeOnSnapshot(pSyncNode, pSyncMsg);
syncSnapshotSendDestroy(pSyncMsg);
} else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) {
SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pMsg);
code = syncNodeOnSnapshotRspCb(pSyncNode, pSyncMsg);
code = syncNodeOnSnapshotReply(pSyncNode, pSyncMsg);
syncSnapshotRspDestroy(pSyncMsg);
} else {
vGError("vgId:%d, msg:%p failed to process since error msg type:%d", pVnode->config.vgId, pMsg, pMsg->msgType);
code = -1;
}
}
vTrace("vgId:%d, sync msg:%p is processed, type:%s code:0x%x", pVnode->config.vgId, pMsg, TMSG_INFO(pMsg->msgType),
code);
......@@ -413,6 +364,19 @@ int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
return code;
}
static int32_t vnodeSyncEqCtrlMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) {
if (msgcb == NULL) {
return -1;
}
int32_t code = tmsgPutToQueue(msgcb, SYNC_CTRL_QUEUE, pMsg);
if (code != 0) {
rpcFreeCont(pMsg->pCont);
pMsg->pCont = NULL;
}
return code;
}
static int32_t vnodeSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) {
if (msgcb == NULL) {
return -1;
......@@ -683,6 +647,7 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path) {
.msgcb = NULL,
.FpSendMsg = vnodeSyncSendMsg,
.FpEqMsg = vnodeSyncEqMsg,
.FpEqCtrlMsg = vnodeSyncEqCtrlMsg,
};
snprintf(syncInfo.path, sizeof(syncInfo.path), "%s%ssync", path, TD_DIRSEP);
......
......@@ -92,9 +92,8 @@ extern "C" {
// /\ UNCHANGED <<serverVars, commitIndex, messages>>
// /\ UNCHANGED <<candidateVars, leaderVars>>
//
int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg);
int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg);
int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg);
int32_t syncNodeOnAppendEntries(SSyncNode* ths, SyncAppendEntries* pMsg);
#ifdef __cplusplus
}
......
......@@ -40,9 +40,7 @@ extern "C" {
// /\ Discard(m)
// /\ UNCHANGED <<serverVars, candidateVars, logVars, elections>>
//
int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
#ifdef __cplusplus
}
......
......@@ -37,12 +37,10 @@ extern "C" {
// msource |-> i,
// mdest |-> j])
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
//
int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode);
int32_t syncNodeRequestVotePeersSnapshot(SSyncNode* pSyncNode);
int32_t syncNodeElect(SSyncNode* pSyncNode);
int32_t syncNodeRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg);
int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode);
int32_t syncNodeSendRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg);
#ifdef __cplusplus
}
......
......@@ -56,9 +56,8 @@ typedef struct SSyncIO {
int32_t (*FpOnSyncAppendEntries)(SSyncNode *pSyncNode, SyncAppendEntries *pMsg);
int32_t (*FpOnSyncAppendEntriesReply)(SSyncNode *pSyncNode, SyncAppendEntriesReply *pMsg);
int32_t (*FpOnSyncTimeout)(SSyncNode *pSyncNode, SyncTimeout *pMsg);
int32_t (*FpOnSyncSnapshotSend)(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg);
int32_t (*FpOnSyncSnapshotRsp)(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg);
int32_t (*FpOnSyncSnapshot)(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg);
int32_t (*FpOnSyncSnapshotReply)(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg);
int8_t isStart;
......
......@@ -45,8 +45,8 @@ void syncIndexMgrDestroy(SSyncIndexMgr *pSyncIndexMgr);
void syncIndexMgrClear(SSyncIndexMgr *pSyncIndexMgr);
void syncIndexMgrSetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, SyncIndex index);
SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId);
cJSON *syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr);
char *syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr);
cJSON * syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr);
char * syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr);
void syncIndexMgrSetStartTime(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, int64_t startTime);
int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId);
......
......@@ -57,9 +57,37 @@ typedef struct SRaftCfg SRaftCfg;
typedef struct SSyncRespMgr SSyncRespMgr;
typedef struct SSyncSnapshotSender SSyncSnapshotSender;
typedef struct SSyncSnapshotReceiver SSyncSnapshotReceiver;
typedef struct SSyncTimer SSyncTimer;
typedef struct SSyncHbTimerData SSyncHbTimerData;
extern bool gRaftDetailLog;
typedef struct SSyncHbTimerData {
SSyncNode* pSyncNode;
SSyncTimer* pTimer;
SRaftId destId;
uint64_t logicClock;
} SSyncHbTimerData;
typedef struct SSyncTimer {
void* pTimer;
TAOS_TMR_CALLBACK timerCb;
uint64_t logicClock;
uint64_t counter;
int32_t timerMS;
SRaftId destId;
void* pData;
} SSyncTimer;
int32_t syncHbTimerInit(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer, SRaftId destId);
int32_t syncHbTimerStart(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer);
int32_t syncHbTimerStop(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer);
typedef struct SPeerState {
SyncIndex lastSendIndex;
int64_t lastSendTime;
} SPeerState;
typedef struct SSyncNode {
// init by SSyncInfo
SyncGroupId vgId;
......@@ -73,6 +101,7 @@ typedef struct SSyncNode {
const SMsgCb* msgcb;
int32_t (*FpSendMsg)(const SEpSet* pEpSet, SRpcMsg* pMsg);
int32_t (*FpEqMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg);
int32_t (*FpEqCtrlMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg);
// init internal
SNodeInfo myNodeInfo;
......@@ -138,6 +167,9 @@ typedef struct SSyncNode {
TAOS_TMR_CALLBACK FpHeartbeatTimerCB; // Timer Fp
uint64_t heartbeatTimerCounter;
// peer heartbeat timer
SSyncTimer peerHeartbeatTimerArr[TSDB_MAX_REPLICA];
// callback
FpOnPingCb FpOnPing;
FpOnPingReplyCb FpOnPingReply;
......@@ -147,8 +179,8 @@ typedef struct SSyncNode {
FpOnRequestVoteReplyCb FpOnRequestVoteReply;
FpOnAppendEntriesCb FpOnAppendEntries;
FpOnAppendEntriesReplyCb FpOnAppendEntriesReply;
FpOnSnapshotSendCb FpOnSnapshotSend;
FpOnSnapshotRspCb FpOnSnapshotRsp;
FpOnSnapshotCb FpOnSnapshot;
FpOnSnapshotReplyCb FpOnSnapshotReply;
// tools
SSyncRespMgr* pSyncRespMgr;
......@@ -159,9 +191,15 @@ typedef struct SSyncNode {
SSyncSnapshotSender* senders[TSDB_MAX_REPLICA];
SSyncSnapshotReceiver* pNewNodeReceiver;
SPeerState peerStates[TSDB_MAX_REPLICA];
// is config changing
bool changing;
int64_t snapshottingIndex;
int64_t snapshottingTime;
int64_t minMatchIndex;
int64_t startTime;
int64_t leaderTime;
int64_t lastReplicateTime;
......@@ -174,7 +212,6 @@ void syncNodeStart(SSyncNode* pSyncNode);
void syncNodeStartStandBy(SSyncNode* pSyncNode);
void syncNodeClose(SSyncNode* pSyncNode);
int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak);
int32_t syncNodeProposeBatch(SSyncNode* pSyncNode, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize);
// option
bool syncNodeSnapshotEnable(SSyncNode* pSyncNode);
......@@ -197,12 +234,8 @@ int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms);
int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode);
int32_t syncNodeStartHeartbeatTimer(SSyncNode* pSyncNode);
int32_t syncNodeStartHeartbeatTimerNow(SSyncNode* pSyncNode);
int32_t syncNodeStartHeartbeatTimerMS(SSyncNode* pSyncNode, int32_t ms);
int32_t syncNodeStopHeartbeatTimer(SSyncNode* pSyncNode);
int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode);
int32_t syncNodeRestartHeartbeatTimerNow(SSyncNode* pSyncNode);
int32_t syncNodeRestartNowHeartbeatTimerMS(SSyncNode* pSyncNode, int32_t ms);
// utils --------------
int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pSyncNode, SRpcMsg* pMsg);
......@@ -214,6 +247,8 @@ void syncNodeErrorLog(const SSyncNode* pSyncNode, char* str);
char* syncNode2SimpleStr(const SSyncNode* pSyncNode);
bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config);
void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex);
SyncIndex syncMinMatchIndex(SSyncNode* pSyncNode);
char* syncNodePeerState2Str(const SSyncNode* pSyncNode);
SSyncNode* syncNodeAcquire(int64_t rid);
void syncNodeRelease(SSyncNode* pNode);
......@@ -221,6 +256,7 @@ void syncNodeRelease(SSyncNode* pNode);
// raft state change --------------
void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term);
void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term);
void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm);
void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr);
void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr);
......@@ -240,21 +276,23 @@ void syncNodeMaybeUpdateCommitBySnapshot(SSyncNode* pSyncNode);
SyncIndex syncNodeGetLastIndex(const SSyncNode* pSyncNode);
SyncTerm syncNodeGetLastTerm(SSyncNode* pSyncNode);
int32_t syncNodeGetLastIndexTerm(SSyncNode* pSyncNode, SyncIndex* pLastIndex, SyncTerm* pLastTerm);
SyncIndex syncNodeSyncStartIndex(SSyncNode* pSyncNode);
SyncIndex syncNodeGetPreIndex(SSyncNode* pSyncNode, SyncIndex index);
SyncTerm syncNodeGetPreTerm(SSyncNode* pSyncNode, SyncIndex index);
int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex* pPreIndex, SyncTerm* pPreTerm);
bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg);
int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag);
int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag);
int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex);
int32_t syncNodePreCommit(SSyncNode* ths, SSyncRaftEntry* pEntry, int32_t code);
int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg);
bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId);
SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId);
SSyncTimer* syncNodeGetHbTimer(SSyncNode* ths, SRaftId* pDestId);
SPeerState* syncNodeGetPeerState(SSyncNode* ths, const SRaftId* pDestId);
bool syncNodeNeedSendAppendEntries(SSyncNode* ths, const SRaftId* pDestId, const SyncAppendEntries* pMsg);
int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta);
int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta);
......@@ -271,7 +309,12 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p
int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode);
bool syncNodeIsMnode(SSyncNode* pSyncNode);
int32_t syncNodePeerStateInit(SSyncNode* pSyncNode);
// trace log
void syncLogRecvTimer(SSyncNode* pSyncNode, const SyncTimeout* pMsg, const char* s);
void syncLogSendRequestVote(SSyncNode* pSyncNode, const SyncRequestVote* pMsg, const char* s);
void syncLogRecvRequestVote(SSyncNode* pSyncNode, const SyncRequestVote* pMsg, const char* s);
......
......@@ -45,8 +45,8 @@ int32_t raftCfgIndexClose(SRaftCfgIndex *pRaftCfgIndex);
int32_t raftCfgIndexPersist(SRaftCfgIndex *pRaftCfgIndex);
int32_t raftCfgIndexAddConfigIndex(SRaftCfgIndex *pRaftCfgIndex, SyncIndex configIndex);
cJSON *raftCfgIndex2Json(SRaftCfgIndex *pRaftCfgIndex);
char *raftCfgIndex2Str(SRaftCfgIndex *pRaftCfgIndex);
cJSON * raftCfgIndex2Json(SRaftCfgIndex *pRaftCfgIndex);
char * raftCfgIndex2Str(SRaftCfgIndex *pRaftCfgIndex);
int32_t raftCfgIndexFromJson(const cJSON *pRoot, SRaftCfgIndex *pRaftCfgIndex);
int32_t raftCfgIndexFromStr(const char *s, SRaftCfgIndex *pRaftCfgIndex);
......@@ -73,14 +73,14 @@ int32_t raftCfgClose(SRaftCfg *pRaftCfg);
int32_t raftCfgPersist(SRaftCfg *pRaftCfg);
int32_t raftCfgAddConfigIndex(SRaftCfg *pRaftCfg, SyncIndex configIndex);
cJSON *syncCfg2Json(SSyncCfg *pSyncCfg);
char *syncCfg2Str(SSyncCfg *pSyncCfg);
char *syncCfg2SimpleStr(SSyncCfg *pSyncCfg);
cJSON * syncCfg2Json(SSyncCfg *pSyncCfg);
char * syncCfg2Str(SSyncCfg *pSyncCfg);
char * syncCfg2SimpleStr(SSyncCfg *pSyncCfg);
int32_t syncCfgFromJson(const cJSON *pRoot, SSyncCfg *pSyncCfg);
int32_t syncCfgFromStr(const char *s, SSyncCfg *pSyncCfg);
cJSON *raftCfg2Json(SRaftCfg *pRaftCfg);
char *raftCfg2Str(SRaftCfg *pRaftCfg);
cJSON * raftCfg2Json(SRaftCfg *pRaftCfg);
char * raftCfg2Str(SRaftCfg *pRaftCfg);
int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg);
int32_t raftCfgFromStr(const char *s, SRaftCfg *pRaftCfg);
......
......@@ -50,16 +50,15 @@ extern "C" {
// msource |-> i,
// mdest |-> j])
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
//
int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode);
int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode);
int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode);
int32_t syncNodeAppendEntriesOnePeer(SSyncNode* pSyncNode, SRaftId* pDestId, SyncIndex nextIndex);
int32_t syncNodeHeartbeatPeers(SSyncNode* pSyncNode);
int32_t syncNodeSendHeartbeat(SSyncNode* pSyncNode, const SRaftId* pDestId, const SyncHeartbeat* pMsg);
int32_t syncNodeReplicate(SSyncNode* pSyncNode, bool isTimer);
int32_t syncNodeAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg);
int32_t syncNodeAppendEntriesBatch(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntriesBatch* pMsg);
int32_t syncNodeReplicate(SSyncNode* pSyncNode);
int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId);
int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* pDestId, const SyncAppendEntries* pMsg);
int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* pDestId, const SyncAppendEntries* pMsg);
#ifdef __cplusplus
}
......
......@@ -49,8 +49,7 @@ extern "C" {
// m)
// /\ UNCHANGED <<state, currentTerm, candidateVars, leaderVars, logVars>>
//
int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg);
int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg);
int32_t syncNodeOnRequestVote(SSyncNode* ths, SyncRequestVote* pMsg);
#ifdef __cplusplus
}
......
......@@ -44,8 +44,7 @@ extern "C" {
// /\ Discard(m)
// /\ UNCHANGED <<serverVars, votedFor, leaderVars, logVars>>
//
int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg);
int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg);
int32_t syncNodeOnRequestVoteReply(SSyncNode* ths, SyncRequestVoteReply* pMsg);
#ifdef __cplusplus
}
......
......@@ -32,9 +32,9 @@ typedef struct SRespStub {
} SRespStub;
typedef struct SSyncRespMgr {
SHashObj *pRespHash;
SHashObj * pRespHash;
int64_t ttl;
void *data;
void * data;
TdThreadMutex mutex;
uint64_t seqNum;
} SSyncRespMgr;
......
......@@ -51,6 +51,7 @@ typedef struct SSyncSnapshotSender {
int32_t replicaIndex;
SyncTerm term;
SyncTerm privateTerm;
int64_t startTime;
bool finish;
} SSyncSnapshotSender;
......@@ -67,6 +68,8 @@ cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender);
char *snapshotSender2Str(SSyncSnapshotSender *pSender);
char *snapshotSender2SimpleStr(SSyncSnapshotSender *pSender, char *event);
int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId);
//---------------------------------------------------
typedef struct SSyncSnapshotReceiver {
bool start;
......@@ -94,8 +97,8 @@ char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event)
//---------------------------------------------------
// on message
int32_t syncNodeOnSnapshotSendCb(SSyncNode *ths, SyncSnapshotSend *pMsg);
int32_t syncNodeOnSnapshotRspCb(SSyncNode *ths, SyncSnapshotRsp *pMsg);
int32_t syncNodeOnSnapshot(SSyncNode *ths, SyncSnapshotSend *pMsg);
int32_t syncNodeOnSnapshotReply(SSyncNode *ths, SyncSnapshotRsp *pMsg);
#ifdef __cplusplus
}
......
......@@ -39,7 +39,7 @@ extern "C" {
// /\ voterLog' = [voterLog EXCEPT ![i] = [j \in {} |-> <<>>]]
// /\ UNCHANGED <<messages, leaderVars, logVars>>
//
int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg);
int32_t syncNodeOnTimer(SSyncNode* ths, SyncTimeout* pMsg);
#ifdef __cplusplus
}
......
......@@ -89,240 +89,6 @@
// /\ UNCHANGED <<candidateVars, leaderVars>>
//
int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) {
int32_t ret = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
syncLogRecvAppendEntries(ths, pMsg, "maybe replica already dropped");
return -1;
}
// maybe update term
if (pMsg->term > ths->pRaftStore->currentTerm) {
syncNodeUpdateTerm(ths, pMsg->term);
}
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
// reset elect timer
if (pMsg->term == ths->pRaftStore->currentTerm) {
ths->leaderCache = pMsg->srcId;
syncNodeResetElectTimer(ths);
}
ASSERT(pMsg->dataLen >= 0);
// return to follower state
if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE) {
syncLogRecvAppendEntries(ths, pMsg, "candidate to follower");
syncNodeBecomeFollower(ths, "from candidate by append entries");
return -1; // ret or reply?
}
SyncTerm localPreLogTerm = 0;
if (pMsg->prevLogIndex >= SYNC_INDEX_BEGIN && pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) {
SSyncRaftEntry* pEntry = ths->pLogStore->getEntry(ths->pLogStore, pMsg->prevLogIndex);
if (pEntry == NULL) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "getEntry error, index:%" PRId64 ", since %s", pMsg->prevLogIndex, terrstr());
syncNodeErrorLog(ths, logBuf);
return -1;
}
localPreLogTerm = pEntry->term;
syncEntryDestory(pEntry);
}
bool logOK =
(pMsg->prevLogIndex == SYNC_INDEX_INVALID) ||
((pMsg->prevLogIndex >= SYNC_INDEX_BEGIN) &&
(pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) && (pMsg->prevLogTerm == localPreLogTerm));
// reject request
if ((pMsg->term < ths->pRaftStore->currentTerm) ||
((pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK)) {
syncLogRecvAppendEntries(ths, pMsg, "reject");
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->pRaftStore->currentTerm;
pReply->success = false;
pReply->matchIndex = SYNC_INDEX_INVALID;
pReply->startTime = ths->startTime;
// msg event log
syncLogSendAppendEntriesReply(ths, pReply, "");
SRpcMsg rpcMsg;
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
syncAppendEntriesReplyDestroy(pReply);
return ret;
}
// accept request
if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_FOLLOWER && logOK) {
// preIndex = -1, or has preIndex entry in local log
ASSERT(pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore));
// has extra entries (> preIndex) in local log
bool hasExtraEntries = pMsg->prevLogIndex < ths->pLogStore->getLastIndex(ths->pLogStore);
// has entries in SyncAppendEntries msg
bool hasAppendEntries = pMsg->dataLen > 0;
syncLogRecvAppendEntries(ths, pMsg, "accept");
if (hasExtraEntries && hasAppendEntries) {
// not conflict by default
bool conflict = false;
SyncIndex extraIndex = pMsg->prevLogIndex + 1;
SSyncRaftEntry* pExtraEntry = ths->pLogStore->getEntry(ths->pLogStore, extraIndex);
if (pExtraEntry == NULL) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "getEntry error2, index:%" PRId64 ", since %s", extraIndex, terrstr());
syncNodeErrorLog(ths, logBuf);
return -1;
}
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
if (pAppendEntry == NULL) {
syncNodeErrorLog(ths, "syncEntryDeserialize pAppendEntry error");
return -1;
}
// log not match, conflict
ASSERT(extraIndex == pAppendEntry->index);
if (pExtraEntry->term != pAppendEntry->term) {
conflict = true;
}
if (conflict) {
// roll back
SyncIndex delBegin = ths->pLogStore->getLastIndex(ths->pLogStore);
SyncIndex delEnd = extraIndex;
sTrace("syncNodeOnAppendEntriesCb --> conflict:%d, delBegin:%" PRId64 ", delEnd:%" PRId64, conflict, delBegin,
delEnd);
// notice! reverse roll back!
for (SyncIndex index = delEnd; index >= delBegin; --index) {
if (ths->pFsm->FpRollBackCb != NULL) {
SSyncRaftEntry* pRollBackEntry = ths->pLogStore->getEntry(ths->pLogStore, index);
if (pRollBackEntry == NULL) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "getEntry error3, index:%" PRId64 ", since %s", index, terrstr());
syncNodeErrorLog(ths, logBuf);
return -1;
}
// if (pRollBackEntry->msgType != TDMT_SYNC_NOOP) {
if (syncUtilUserRollback(pRollBackEntry->msgType)) {
SRpcMsg rpcMsg;
syncEntry2OriginalRpc(pRollBackEntry, &rpcMsg);
SFsmCbMeta cbMeta = {0};
cbMeta.index = pRollBackEntry->index;
cbMeta.lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, cbMeta.index);
cbMeta.isWeak = pRollBackEntry->isWeak;
cbMeta.code = 0;
cbMeta.state = ths->state;
cbMeta.seqNum = pRollBackEntry->seqNum;
ths->pFsm->FpRollBackCb(ths->pFsm, &rpcMsg, cbMeta);
rpcFreeCont(rpcMsg.pCont);
}
syncEntryDestory(pRollBackEntry);
}
}
// delete confict entries
ths->pLogStore->truncate(ths->pLogStore, extraIndex);
// append new entries
ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry);
// pre commit
syncNodePreCommit(ths, pAppendEntry, 0);
}
// free memory
syncEntryDestory(pExtraEntry);
syncEntryDestory(pAppendEntry);
} else if (hasExtraEntries && !hasAppendEntries) {
// do nothing
} else if (!hasExtraEntries && hasAppendEntries) {
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
if (pAppendEntry == NULL) {
syncNodeErrorLog(ths, "syncEntryDeserialize pAppendEntry2 error");
return -1;
}
// append new entries
ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry);
// pre commit
syncNodePreCommit(ths, pAppendEntry, 0);
// free memory
syncEntryDestory(pAppendEntry);
} else if (!hasExtraEntries && !hasAppendEntries) {
// do nothing
} else {
syncNodeLog3("", ths);
ASSERT(0);
}
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->pRaftStore->currentTerm;
pReply->success = true;
if (hasAppendEntries) {
pReply->matchIndex = pMsg->prevLogIndex + 1;
} else {
pReply->matchIndex = pMsg->prevLogIndex;
}
pReply->startTime = ths->startTime;
// msg event log
syncLogSendAppendEntriesReply(ths, pReply, "");
SRpcMsg rpcMsg;
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
syncAppendEntriesReplyDestroy(pReply);
// maybe update commit index from leader
if (pMsg->commitIndex > ths->commitIndex) {
// has commit entry in local
if (pMsg->commitIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) {
SyncIndex beginIndex = ths->commitIndex + 1;
SyncIndex endIndex = pMsg->commitIndex;
// update commit index
ths->commitIndex = pMsg->commitIndex;
// call back Wal
ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex);
int32_t code = syncNodeCommit(ths, beginIndex, endIndex, ths->state);
ASSERT(code == 0);
}
}
}
return ret;
}
static int32_t syncNodeMakeLogSame(SSyncNode* ths, SyncAppendEntries* pMsg) {
int32_t code;
......@@ -505,99 +271,48 @@ static bool syncNodeOnAppendEntriesLogOK(SSyncNode* pSyncNode, SyncAppendEntries
return false;
}
int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg) {
int32_t ret = 0;
int32_t code = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
syncLogRecvAppendEntriesBatch(ths, pMsg, "maybe replica already dropped");
return -1;
}
// maybe update term
if (pMsg->term > ths->pRaftStore->currentTerm) {
syncNodeUpdateTerm(ths, pMsg->term);
}
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
// reset elect timer
if (pMsg->term == ths->pRaftStore->currentTerm) {
ths->leaderCache = pMsg->srcId;
syncNodeResetElectTimer(ths);
}
ASSERT(pMsg->dataLen >= 0);
// candidate to follower
//
// operation:
// to follower
do {
bool condition = pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE;
if (condition) {
syncLogRecvAppendEntriesBatch(ths, pMsg, "candidate to follower");
syncNodeBecomeFollower(ths, "from candidate by append entries");
return 0; // do not reply?
}
} while (0);
// fake match
//
// condition1:
// preIndex <= my commit index
//
// operation:
// if hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex, append entry
// match my-commit-index or my-commit-index + batchSize
do {
bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) &&
(pMsg->prevLogIndex <= ths->commitIndex);
if (condition) {
syncLogRecvAppendEntriesBatch(ths, pMsg, "fake match");
SyncIndex matchIndex = ths->commitIndex;
bool hasAppendEntries = pMsg->dataLen > 0;
SOffsetAndContLen* metaTableArr = syncAppendEntriesBatchMetaTableArray(pMsg);
int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) {
// maybe update commit index, leader notice me
if (newCommitIndex > ths->commitIndex) {
// has commit entry in local
if (newCommitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
// advance commit index to sanpshot first
SSnapshot snapshot;
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot);
if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) {
SyncIndex commitBegin = ths->commitIndex;
SyncIndex commitEnd = snapshot.lastApplyIndex;
ths->commitIndex = snapshot.lastApplyIndex;
if (hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex) {
int32_t pass = 0;
SyncIndex logLastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
bool hasExtraEntries = logLastIndex > pMsg->prevLogIndex;
// make log same
if (hasExtraEntries) {
// make log same, rollback deleted entries
pass = syncNodeDoMakeLogSame(ths, pMsg->prevLogIndex + 1);
ASSERT(pass >= 0);
char eventLog[128];
snprintf(eventLog, sizeof(eventLog), "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin,
commitEnd);
syncNodeEventLog(ths, eventLog);
}
// append entry batch
if (pass == 0) {
// assert! no batch
ASSERT(pMsg->dataCount <= 1);
SyncIndex beginIndex = ths->commitIndex + 1;
SyncIndex endIndex = newCommitIndex;
for (int32_t i = 0; i < pMsg->dataCount; ++i) {
SSyncRaftEntry* pAppendEntry = (SSyncRaftEntry*)(pMsg->data + metaTableArr[i].offset);
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
if (code != 0) {
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
return -1;
}
// update commit index
ths->commitIndex = newCommitIndex;
code = syncNodePreCommit(ths, pAppendEntry, 0);
// call back Wal
int32_t code = ths->pLogStore->syncLogUpdateCommitIndex(ths->pLogStore, ths->commitIndex);
ASSERT(code == 0);
// syncEntryDestory(pAppendEntry);
code = syncNodeDoCommit(ths, beginIndex, endIndex, ths->state);
ASSERT(code == 0);
}
}
// fsync once
SSyncLogStoreData* pData = ths->pLogStore->data;
SWal* pWal = pData->pWal;
walFsync(pWal, false);
return 0;
}
// update match index
matchIndex = pMsg->prevLogIndex + pMsg->dataCount;
int32_t syncNodeOnAppendEntries(SSyncNode* ths, SyncAppendEntries* pMsg) {
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) {
syncLogRecvAppendEntries(ths, pMsg, "not in my config");
goto _IGNORE;
}
// prepare response msg
......@@ -605,400 +320,165 @@ int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatc
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->pRaftStore->currentTerm;
pReply->success = false;
// pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
pReply->matchIndex = SYNC_INDEX_INVALID;
pReply->lastSendIndex = pMsg->prevLogIndex + 1;
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
pReply->success = true;
pReply->matchIndex = matchIndex;
pReply->startTime = ths->startTime;
// msg event log
syncLogSendAppendEntriesReply(ths, pReply, "");
// send response
SRpcMsg rpcMsg;
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
syncAppendEntriesReplyDestroy(pReply);
return 0;
if (pMsg->term < ths->pRaftStore->currentTerm) {
syncLogRecvAppendEntries(ths, pMsg, "reject, small term");
goto _SEND_RESPONSE;
}
} while (0);
// calculate logOK here, before will coredump, due to fake match
bool logOK = syncNodeOnAppendEntriesBatchLogOK(ths, pMsg);
// not match
//
// condition1:
// term < myTerm
//
// condition2:
// !logOK
//
// operation:
// not match
// no operation on log
do {
bool condition1 = pMsg->term < ths->pRaftStore->currentTerm;
bool condition2 =
(pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK;
bool condition = condition1 || condition2;
if (condition) {
syncLogRecvAppendEntriesBatch(ths, pMsg, "not match");
if (pMsg->term > ths->pRaftStore->currentTerm) {
pReply->term = pMsg->term;
}
// maybe update commit index by snapshot
syncNodeMaybeUpdateCommitBySnapshot(ths);
syncNodeStepDown(ths, pMsg->term);
syncNodeResetElectTimer(ths);
// prepare response msg
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->pRaftStore->currentTerm;
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
pReply->success = false;
pReply->matchIndex = ths->commitIndex;
pReply->startTime = ths->startTime;
SyncIndex startIndex = ths->pLogStore->syncLogBeginIndex(ths->pLogStore);
SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
// msg event log
syncLogSendAppendEntriesReply(ths, pReply, "");
if (pMsg->prevLogIndex > lastIndex) {
syncLogRecvAppendEntries(ths, pMsg, "reject, index not match");
goto _SEND_RESPONSE;
}
// send response
SRpcMsg rpcMsg;
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
syncAppendEntriesReplyDestroy(pReply);
if (pMsg->prevLogIndex >= startIndex) {
SyncTerm myPreLogTerm = syncNodeGetPreTerm(ths, pMsg->prevLogIndex + 1);
ASSERT(myPreLogTerm != SYNC_TERM_INVALID);
return 0;
if (myPreLogTerm != pMsg->prevLogTerm) {
syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term not match");
goto _SEND_RESPONSE;
}
}
} while (0);
// really match
//
// condition:
// logOK
//
// operation:
// match
// make log same
do {
bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && logOK;
if (condition) {
// has extra entries (> preIndex) in local log
SyncIndex myLastIndex = syncNodeGetLastIndex(ths);
bool hasExtraEntries = myLastIndex > pMsg->prevLogIndex;
// has entries in SyncAppendEntries msg
// accept
pReply->success = true;
bool hasAppendEntries = pMsg->dataLen > 0;
SOffsetAndContLen* metaTableArr = syncAppendEntriesBatchMetaTableArray(pMsg);
if (hasAppendEntries) {
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
ASSERT(pAppendEntry != NULL);
syncLogRecvAppendEntriesBatch(ths, pMsg, "really match");
SyncIndex appendIndex = pMsg->prevLogIndex + 1;
SSyncRaftEntry* pLocalEntry = NULL;
int32_t code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry);
if (code == 0) {
if (pLocalEntry->term == pAppendEntry->term) {
// do nothing
int32_t pass = 0;
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "log match, do nothing, index:%" PRId64, appendIndex);
syncNodeEventLog(ths, logBuf);
if (hasExtraEntries) {
// make log same, rollback deleted entries
pass = syncNodeDoMakeLogSame(ths, pMsg->prevLogIndex + 1);
ASSERT(pass >= 0);
} else {
// truncate
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, truncate error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
goto _IGNORE;
}
if (hasAppendEntries) {
// append entry batch
if (pass == 0) {
// assert! no batch
ASSERT(pMsg->dataCount <= 1);
// append entry batch
for (int32_t i = 0; i < pMsg->dataCount; ++i) {
SSyncRaftEntry* pAppendEntry = (SSyncRaftEntry*)(pMsg->data + metaTableArr[i].offset);
// append
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
if (code != 0) {
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
return -1;
}
code = syncNodePreCommit(ths, pAppendEntry, 0);
ASSERT(code == 0);
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, append error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
// syncEntryDestory(pAppendEntry);
goto _IGNORE;
}
}
// fsync once
SSyncLogStoreData* pData = ths->pLogStore->data;
SWal* pWal = pData->pWal;
walFsync(pWal, false);
}
// prepare response msg
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->pRaftStore->currentTerm;
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
pReply->success = true;
pReply->matchIndex = hasAppendEntries ? pMsg->prevLogIndex + pMsg->dataCount : pMsg->prevLogIndex;
pReply->startTime = ths->startTime;
// msg event log
syncLogSendAppendEntriesReply(ths, pReply, "");
// send response
SRpcMsg rpcMsg;
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
syncAppendEntriesReplyDestroy(pReply);
// maybe update commit index, leader notice me
if (pMsg->commitIndex > ths->commitIndex) {
SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
SyncIndex beginIndex = 0;
SyncIndex endIndex = -1;
// has commit entry in local
if (pMsg->commitIndex <= lastIndex) {
beginIndex = ths->commitIndex + 1;
endIndex = pMsg->commitIndex;
// update commit index
ths->commitIndex = pMsg->commitIndex;
// call back Wal
code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex);
ASSERT(code == 0);
} else if (pMsg->commitIndex > lastIndex && ths->commitIndex < lastIndex) {
beginIndex = ths->commitIndex + 1;
endIndex = lastIndex;
// update commit index, speed up
ths->commitIndex = lastIndex;
// call back Wal
code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex);
ASSERT(code == 0);
}
} else {
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
// log not exist
code = syncNodeCommit(ths, beginIndex, endIndex, ths->state);
ASSERT(code == 0);
}
// truncate
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, truncate error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
return 0;
goto _IGNORE;
}
} while (0);
return 0;
}
int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg) {
int32_t ret = 0;
int32_t code = 0;
// append
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, append error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
syncLogRecvAppendEntries(ths, pMsg, "maybe replica already dropped");
return -1;
goto _IGNORE;
}
// maybe update term
if (pMsg->term > ths->pRaftStore->currentTerm) {
syncNodeUpdateTerm(ths, pMsg->term);
}
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
} else {
// error
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, get local entry error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
// reset elect timer
if (pMsg->term == ths->pRaftStore->currentTerm) {
ths->leaderCache = pMsg->srcId;
syncNodeResetElectTimer(ths);
goto _IGNORE;
}
ASSERT(pMsg->dataLen >= 0);
// candidate to follower
//
// operation:
// to follower
do {
bool condition = pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE;
if (condition) {
syncLogRecvAppendEntries(ths, pMsg, "candidate to follower");
syncNodeBecomeFollower(ths, "from candidate by append entries");
return 0; // do not reply?
}
} while (0);
// fake match
//
// condition1:
// preIndex <= my commit index
//
// operation:
// if hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex, append entry
// match my-commit-index or my-commit-index + 1
// no operation on log
do {
bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) &&
(pMsg->prevLogIndex <= ths->commitIndex);
if (condition) {
syncLogRecvAppendEntries(ths, pMsg, "fake match");
#if 0
if (code != 0 && terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
ASSERT(code == 0);
SyncIndex matchIndex = ths->commitIndex;
bool hasAppendEntries = pMsg->dataLen > 0;
if (hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex) {
// append entry
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
ASSERT(pAppendEntry != NULL);
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
ASSERT(code == 0);
{
// has extra entries (> preIndex) in local log
SyncIndex logLastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
bool hasExtraEntries = logLastIndex > pMsg->prevLogIndex;
} else {
ASSERT(code == 0);
if (hasExtraEntries) {
// make log same, rollback deleted entries
code = syncNodeMakeLogSame(ths, pMsg);
if (pLocalEntry->term == pAppendEntry->term) {
// do nothing
} else {
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
ASSERT(code == 0);
}
}
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
if (code != 0) {
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
return -1;
}
// pre commit
code = syncNodePreCommit(ths, pAppendEntry, 0);
ASSERT(code == 0);
}
}
#endif
// update match index
matchIndex = pMsg->prevLogIndex + 1;
pReply->matchIndex = pAppendEntry->index;
syncEntryDestory(pLocalEntry);
syncEntryDestory(pAppendEntry);
}
// prepare response msg
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->pRaftStore->currentTerm;
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
pReply->success = true;
pReply->matchIndex = matchIndex;
pReply->startTime = ths->startTime;
// msg event log
syncLogSendAppendEntriesReply(ths, pReply, "");
// send response
SRpcMsg rpcMsg;
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
syncAppendEntriesReplyDestroy(pReply);
} else {
// no append entries, do nothing
// maybe has extra entries, no harm
return ret;
// update match index
pReply->matchIndex = pMsg->prevLogIndex;
}
} while (0);
// calculate logOK here, before will coredump, due to fake match
bool logOK = syncNodeOnAppendEntriesLogOK(ths, pMsg);
// not match
//
// condition1:
// term < myTerm
//
// condition2:
// !logOK
//
// operation:
// not match
// no operation on log
do {
bool condition1 = pMsg->term < ths->pRaftStore->currentTerm;
bool condition2 =
(pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK;
bool condition = condition1 || condition2;
if (condition) {
syncLogRecvAppendEntries(ths, pMsg, "not match");
// prepare response msg
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->pRaftStore->currentTerm;
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
pReply->success = false;
pReply->matchIndex = SYNC_INDEX_INVALID;
pReply->startTime = ths->startTime;
// maybe update commit index, leader notice me
syncNodeFollowerCommit(ths, pMsg->commitIndex);
// msg event log
syncLogSendAppendEntriesReply(ths, pReply, "");
syncLogRecvAppendEntries(ths, pMsg, "accept");
goto _SEND_RESPONSE;
// send response
SRpcMsg rpcMsg;
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
_IGNORE:
syncAppendEntriesReplyDestroy(pReply);
return 0;
return ret;
}
} while (0);
// really match
//
// condition:
// logOK
//
// operation:
// match
// make log same
do {
bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && logOK;
if (condition) {
// has extra entries (> preIndex) in local log
SyncIndex myLastIndex = syncNodeGetLastIndex(ths);
bool hasExtraEntries = myLastIndex > pMsg->prevLogIndex;
// has entries in SyncAppendEntries msg
bool hasAppendEntries = pMsg->dataLen > 0;
syncLogRecvAppendEntries(ths, pMsg, "really match");
if (hasExtraEntries) {
// make log same, rollback deleted entries
code = syncNodeMakeLogSame(ths, pMsg);
ASSERT(code == 0);
}
if (hasAppendEntries) {
// append entry
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
ASSERT(pAppendEntry != NULL);
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
if (code != 0) {
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
return -1;
}
// pre commit
code = syncNodePreCommit(ths, pAppendEntry, 0);
ASSERT(code == 0);
syncEntryDestory(pAppendEntry);
}
// prepare response msg
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->pRaftStore->currentTerm;
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
pReply->success = true;
pReply->matchIndex = hasAppendEntries ? pMsg->prevLogIndex + 1 : pMsg->prevLogIndex;
pReply->startTime = ths->startTime;
_SEND_RESPONSE:
// msg event log
syncLogSendAppendEntriesReply(ths, pReply, "");
......@@ -1008,41 +488,5 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
syncAppendEntriesReplyDestroy(pReply);
// maybe update commit index, leader notice me
if (pMsg->commitIndex > ths->commitIndex) {
// has commit entry in local
if (pMsg->commitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
// advance commit index to sanpshot first
SSnapshot snapshot;
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot);
if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) {
SyncIndex commitBegin = ths->commitIndex;
SyncIndex commitEnd = snapshot.lastApplyIndex;
ths->commitIndex = snapshot.lastApplyIndex;
char eventLog[128];
snprintf(eventLog, sizeof(eventLog), "commit by snapshot from index:%" PRId64 " to index:%" PRId64,
commitBegin, commitEnd);
syncNodeEventLog(ths, eventLog);
}
SyncIndex beginIndex = ths->commitIndex + 1;
SyncIndex endIndex = pMsg->commitIndex;
// update commit index
ths->commitIndex = pMsg->commitIndex;
// call back Wal
code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex);
ASSERT(code == 0);
code = syncNodeCommit(ths, beginIndex, endIndex, ths->state);
ASSERT(code == 0);
}
}
return ret;
}
} while (0);
return ret;
return 0;
}
\ No newline at end of file
......@@ -20,6 +20,7 @@
#include "syncRaftCfg.h"
#include "syncRaftLog.h"
#include "syncRaftStore.h"
#include "syncReplication.h"
#include "syncSnapshot.h"
#include "syncUtil.h"
#include "syncVoteMgr.h"
......@@ -37,74 +38,6 @@
// /\ Discard(m)
// /\ UNCHANGED <<serverVars, candidateVars, logVars, elections>>
//
int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
int32_t ret = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
syncLogRecvAppendEntriesReply(ths, pMsg, "maybe replica already dropped");
return -1;
}
// drop stale response
if (pMsg->term < ths->pRaftStore->currentTerm) {
syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response");
return 0;
}
// no need this code, because if I receive reply.term, then I must have sent for that term.
// if (pMsg->term > ths->pRaftStore->currentTerm) {
// syncNodeUpdateTerm(ths, pMsg->term);
// }
if (pMsg->term > ths->pRaftStore->currentTerm) {
syncLogRecvAppendEntriesReply(ths, pMsg, "error term");
return -1;
}
ASSERT(pMsg->term == ths->pRaftStore->currentTerm);
// update time
syncIndexMgrSetStartTime(ths->pNextIndex, &(pMsg->srcId), pMsg->startTime);
syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), taosGetTimestampMs());
SyncIndex beforeNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
SyncIndex beforeMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
if (pMsg->success) {
// nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1]
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1);
// matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex]
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex);
// maybe commit
syncMaybeAdvanceCommitIndex(ths);
} else {
SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
// notice! int64, uint64
if (nextIndex > SYNC_INDEX_BEGIN) {
--nextIndex;
} else {
nextIndex = SYNC_INDEX_BEGIN;
}
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex);
}
SyncIndex afterNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
SyncIndex afterMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
do {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf),
"before next:%" PRId64 ", match:%" PRId64 ", after next:%" PRId64 ", match:%" PRId64, beforeNextIndex,
beforeMatchIndex, afterNextIndex, afterMatchIndex);
syncLogRecvAppendEntriesReply(ths, pMsg, logBuf);
} while (0);
return 0;
}
// only start once
static void syncNodeStartSnapshotOnce(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, SyncTerm lastApplyTerm,
......@@ -151,13 +84,13 @@ static void syncNodeStartSnapshotOnce(SSyncNode* ths, SyncIndex beginIndex, Sync
}
}
int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
int32_t ret = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
syncLogRecvAppendEntriesReply(ths, pMsg, "maybe replica already dropped");
return -1;
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) {
syncLogRecvAppendEntriesReply(ths, pMsg, "not in my config");
return 0;
}
// drop stale response
......@@ -166,251 +99,40 @@ int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntrie
return 0;
}
// error term
if (ths->state == TAOS_SYNC_STATE_LEADER) {
if (pMsg->term > ths->pRaftStore->currentTerm) {
syncLogRecvAppendEntriesReply(ths, pMsg, "error term");
syncNodeStepDown(ths, pMsg->term);
return -1;
}
ASSERT(pMsg->term == ths->pRaftStore->currentTerm);
// update time
syncIndexMgrSetStartTime(ths->pNextIndex, &(pMsg->srcId), pMsg->startTime);
syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), taosGetTimestampMs());
SyncIndex beforeNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
SyncIndex beforeMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
if (pMsg->success) {
SyncIndex newNextIndex = pMsg->matchIndex + 1;
SyncIndex newMatchIndex = pMsg->matchIndex;
bool needStartSnapshot = false;
if (newMatchIndex >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, newMatchIndex)) {
needStartSnapshot = true;
}
if (!needStartSnapshot) {
// update next-index, match-index
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), newNextIndex);
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), newMatchIndex);
// maybe commit
if (ths->state == TAOS_SYNC_STATE_LEADER) {
syncMaybeAdvanceCommitIndex(ths);
}
} else {
// start snapshot <match+1, old snapshot.end>
SSnapshot oldSnapshot;
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &oldSnapshot);
if (oldSnapshot.lastApplyIndex > newMatchIndex) {
syncNodeStartSnapshotOnce(ths, newMatchIndex + 1, oldSnapshot.lastApplyIndex, oldSnapshot.lastApplyTerm,
pMsg); // term maybe not ok?
}
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), oldSnapshot.lastApplyIndex + 1);
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), newMatchIndex);
}
// event log, update next-index
do {
char host[64];
int16_t port;
syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port);
char logBuf[256];
snprintf(logBuf, sizeof(logBuf), "reset next-index:%" PRId64 ", match-index:%" PRId64 " for %s:%d", newNextIndex,
newMatchIndex, host, port);
syncNodeEventLog(ths, logBuf);
} while (0);
} else {
SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
if (nextIndex > SYNC_INDEX_BEGIN) {
--nextIndex;
// speed up
if (nextIndex > pMsg->matchIndex + 1) {
nextIndex = pMsg->matchIndex + 1;
}
bool needStartSnapshot = false;
if (nextIndex >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex)) {
needStartSnapshot = true;
}
if (nextIndex - 1 >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex - 1)) {
needStartSnapshot = true;
}
if (!needStartSnapshot) {
// do nothing
} else {
SSnapshot oldSnapshot;
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &oldSnapshot);
SyncTerm newSnapshotTerm = oldSnapshot.lastApplyTerm;
SyncIndex endIndex;
if (ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex + 1)) {
endIndex = nextIndex;
} else {
endIndex = oldSnapshot.lastApplyIndex;
}
syncNodeStartSnapshotOnce(ths, pMsg->matchIndex + 1, endIndex, newSnapshotTerm, pMsg);
// get sender
SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(pMsg->srcId));
ASSERT(pSender != NULL);
SyncIndex sentryIndex = pSender->snapshot.lastApplyIndex + 1;
// update nextIndex to sentryIndex
if (nextIndex <= sentryIndex) {
nextIndex = sentryIndex;
}
}
} else {
nextIndex = SYNC_INDEX_BEGIN;
}
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex);
SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
if (pMsg->matchIndex > oldMatchIndex) {
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex);
}
// event log, update next-index
do {
char host[64];
int16_t port;
syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port);
SyncIndex newNextIndex = nextIndex;
SyncIndex newMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
char logBuf[256];
snprintf(logBuf, sizeof(logBuf), "reset2 next-index:%" PRId64 ", match-index:%" PRId64 " for %s:%d", newNextIndex,
newMatchIndex, host, port);
syncNodeEventLog(ths, logBuf);
} while (0);
}
SyncIndex afterNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
SyncIndex afterMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
do {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf),
"before next:%" PRId64 ", match:%" PRId64 ", after next:%" PRId64 ", match:%" PRId64, beforeNextIndex,
beforeMatchIndex, afterNextIndex, afterMatchIndex);
syncLogRecvAppendEntriesReply(ths, pMsg, logBuf);
} while (0);
return 0;
}
int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
int32_t ret = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
syncLogRecvAppendEntriesReply(ths, pMsg, "maybe replica already dropped");
return -1;
}
// drop stale response
if (pMsg->term < ths->pRaftStore->currentTerm) {
syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response");
return 0;
}
// no need this code, because if I receive reply.term, then I must have sent for that term.
// if (pMsg->term > ths->pRaftStore->currentTerm) {
// syncNodeUpdateTerm(ths, pMsg->term);
// }
if (pMsg->term > ths->pRaftStore->currentTerm) {
syncLogRecvAppendEntriesReply(ths, pMsg, "error term");
return -1;
}
ASSERT(pMsg->term == ths->pRaftStore->currentTerm);
// update time
syncIndexMgrSetStartTime(ths->pNextIndex, &(pMsg->srcId), pMsg->startTime);
syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), taosGetTimestampMs());
SyncIndex beforeNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
SyncIndex beforeMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
if (pMsg->success) {
// nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1]
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1);
// matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex]
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex);
// maybe commit
if (ths->state == TAOS_SYNC_STATE_LEADER) {
syncMaybeAdvanceCommitIndex(ths);
}
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1);
} else {
SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
// notice! int64, uint64
if (nextIndex > SYNC_INDEX_BEGIN) {
--nextIndex;
// get sender
SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(pMsg->srcId));
ASSERT(pSender != NULL);
SSnapshot snapshot = {.data = NULL,
.lastApplyIndex = SYNC_INDEX_INVALID,
.lastApplyTerm = 0,
.lastConfigIndex = SYNC_INDEX_INVALID};
void* pReader = NULL;
ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot, NULL, &pReader);
if (snapshot.lastApplyIndex >= SYNC_INDEX_BEGIN && nextIndex <= snapshot.lastApplyIndex + 1 &&
!snapshotSenderIsStart(pSender) && pMsg->privateTerm < pSender->privateTerm) {
// has snapshot
ASSERT(pReader != NULL);
SSnapshotParam readerParam = {.start = 0, .end = snapshot.lastApplyIndex};
snapshotSenderStart(pSender, readerParam, snapshot, pReader);
} else {
// no snapshot
if (pReader != NULL) {
ths->pFsm->FpSnapshotStopRead(ths->pFsm, pReader);
}
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex);
}
SyncIndex sentryIndex = pSender->snapshot.lastApplyIndex + 1;
// send next append entries
SPeerState* pState = syncNodeGetPeerState(ths, &(pMsg->srcId));
ASSERT(pState != NULL);
// update nextIndex to sentryIndex
if (nextIndex <= sentryIndex) {
nextIndex = sentryIndex;
}
} else {
nextIndex = SYNC_INDEX_BEGIN;
if (pMsg->lastSendIndex == pState->lastSendIndex) {
syncNodeReplicateOne(ths, &(pMsg->srcId));
}
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex);
}
SyncIndex afterNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
SyncIndex afterMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
do {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf),
"before next:%" PRId64 ", match:%" PRId64 ", after next:%" PRId64 ", match:%" PRId64, beforeNextIndex,
beforeMatchIndex, afterNextIndex, afterMatchIndex);
syncLogRecvAppendEntriesReply(ths, pMsg, logBuf);
} while (0);
syncLogRecvAppendEntriesReply(ths, pMsg, "process");
return 0;
}
\ No newline at end of file
......@@ -45,8 +45,10 @@
// /\ UNCHANGED <<messages, serverVars, candidateVars, leaderVars, log>>
//
void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
syncIndexMgrLog2("==syncNodeMaybeAdvanceCommitIndex== pNextIndex", pSyncNode->pNextIndex);
syncIndexMgrLog2("==syncNodeMaybeAdvanceCommitIndex== pMatchIndex", pSyncNode->pMatchIndex);
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
syncNodeErrorLog(pSyncNode, "not leader, can not advance commit index");
return;
}
// advance commit index to sanpshot first
SSnapshot snapshot;
......@@ -75,9 +77,11 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
if (h) {
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
} else {
pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, index);
if (pEntry == NULL) {
sError("failed to get entry since %s. index:%" PRId64, tstrerror(terrno), index);
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "advance commit index error, read wal index:%" PRId64, index);
syncNodeErrorLog(pSyncNode, logBuf);
return;
}
}
......@@ -125,13 +129,17 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
pSyncNode->commitIndex = newCommitIndex;
// call back Wal
pSyncNode->pLogStore->updateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
// execute fsm
if (pSyncNode->pFsm != NULL) {
int32_t code = syncNodeCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state);
int32_t code = syncNodeDoCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state);
if (code != 0) {
wError("failed to commit sync node since %s", tstrerror(terrno));
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "advance commit index error, do commit begin:%" PRId64 ", end:%" PRId64,
beginIndex, endIndex);
syncNodeErrorLog(pSyncNode, logBuf);
return;
}
}
}
......@@ -162,6 +170,8 @@ static inline int64_t syncNodeAbs64(int64_t a, int64_t b) {
}
int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) {
return pSyncNode->quorum;
int32_t quorum = 1; // self
int64_t timeNow = taosGetTimestampMs();
......@@ -220,6 +230,7 @@ int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) {
return quorum;
}
/*
bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
int agreeCount = 0;
for (int i = 0; i < pSyncNode->replicaNum; ++i) {
......@@ -232,8 +243,8 @@ bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
}
return false;
}
*/
/*
bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
int agreeCount = 0;
for (int i = 0; i < pSyncNode->replicaNum; ++i) {
......@@ -246,4 +257,3 @@ bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
}
return false;
}
*/
......@@ -30,45 +30,6 @@
// msource |-> i,
// mdest |-> j])
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
//
int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
int32_t ret = 0;
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId);
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = pSyncNode->peersId[i];
pMsg->term = pSyncNode->pRaftStore->currentTerm;
pMsg->lastLogIndex = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore);
pMsg->lastLogTerm = pSyncNode->pLogStore->getLastTerm(pSyncNode->pLogStore);
ret = syncNodeRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg);
ASSERT(ret == 0);
syncRequestVoteDestroy(pMsg);
}
return ret;
}
int32_t syncNodeRequestVotePeersSnapshot(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
int32_t ret = 0;
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId);
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = pSyncNode->peersId[i];
pMsg->term = pSyncNode->pRaftStore->currentTerm;
ret = syncNodeGetLastIndexTerm(pSyncNode, &(pMsg->lastLogIndex), &(pMsg->lastLogTerm));
ASSERT(ret == 0);
ret = syncNodeRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg);
ASSERT(ret == 0);
syncRequestVoteDestroy(pMsg);
}
return ret;
}
int32_t syncNodeElect(SSyncNode* pSyncNode) {
syncNodeEventLog(pSyncNode, "begin election");
......@@ -111,27 +72,38 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) {
}
switch (pSyncNode->pRaftCfg->snapshotStrategy) {
case SYNC_STRATEGY_NO_SNAPSHOT:
ret = syncNodeRequestVotePeers(pSyncNode);
break;
ASSERT(ret == 0);
case SYNC_STRATEGY_STANDARD_SNAPSHOT:
case SYNC_STRATEGY_WAL_FIRST:
ret = syncNodeRequestVotePeersSnapshot(pSyncNode);
break;
syncNodeResetElectTimer(pSyncNode);
default:
ret = syncNodeRequestVotePeers(pSyncNode);
break;
return ret;
}
int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode) {
if (pSyncNode->state != TAOS_SYNC_STATE_CANDIDATE) {
syncNodeEventLog(pSyncNode, "not candidate, stop elect");
return 0;
}
int32_t ret = 0;
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId);
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = pSyncNode->peersId[i];
pMsg->term = pSyncNode->pRaftStore->currentTerm;
ret = syncNodeGetLastIndexTerm(pSyncNode, &(pMsg->lastLogIndex), &(pMsg->lastLogTerm));
ASSERT(ret == 0);
syncNodeResetElectTimer(pSyncNode);
ret = syncNodeSendRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg);
ASSERT(ret == 0);
syncRequestVoteDestroy(pMsg);
}
return ret;
}
int32_t syncNodeRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg) {
int32_t syncNodeSendRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg) {
int32_t ret = 0;
syncLogSendRequestVote(pSyncNode, pMsg, "");
......
......@@ -326,18 +326,18 @@ static void *syncIOConsumerFunc(void *param) {
}
} else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) {
if (io->FpOnSyncSnapshotSend != NULL) {
if (io->FpOnSyncSnapshot != NULL) {
SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pRpcMsg);
ASSERT(pSyncMsg != NULL);
io->FpOnSyncSnapshotSend(io->pSyncNode, pSyncMsg);
io->FpOnSyncSnapshot(io->pSyncNode, pSyncMsg);
syncSnapshotSendDestroy(pSyncMsg);
}
} else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) {
if (io->FpOnSyncSnapshotRsp != NULL) {
if (io->FpOnSyncSnapshotReply != NULL) {
SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pRpcMsg);
ASSERT(pSyncMsg != NULL);
io->FpOnSyncSnapshotRsp(io->pSyncNode, pSyncMsg);
io->FpOnSyncSnapshotReply(io->pSyncNode, pSyncMsg);
syncSnapshotRspDestroy(pSyncMsg);
}
......
......@@ -83,6 +83,10 @@ void syncIndexMgrSetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId,
}
SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId) {
if (pSyncIndexMgr == NULL) {
return SYNC_INDEX_INVALID;
}
for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) {
if (syncUtilSameId(&((*(pSyncIndexMgr->replicas))[i]), pRaftId)) {
SyncIndex idx = (pSyncIndexMgr->index)[i];
......
......@@ -46,6 +46,7 @@ static void syncNodeEqElectTimer(void* param, void* tmrId);
static void syncNodeEqHeartbeatTimer(void* param, void* tmrId);
static int32_t syncNodeEqNoop(SSyncNode* ths);
static int32_t syncNodeAppendNoop(SSyncNode* ths);
static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId);
// process message ----
int32_t syncNodeOnPingCb(SSyncNode* ths, SyncPing* pMsg);
......@@ -67,7 +68,7 @@ int32_t syncInit() {
syncCleanUp();
ret = -1;
} else {
sDebug("sync rsetId:%" PRId32 " is open", tsNodeRefId);
sDebug("sync rsetId:%d is open", tsNodeRefId);
ret = syncEnvStart();
}
}
......@@ -80,7 +81,7 @@ void syncCleanUp() {
ASSERT(ret == 0);
if (tsNodeRefId != -1) {
sDebug("sync rsetId:%" PRId32 " is closed", tsNodeRefId);
sDebug("sync rsetId:%d is closed", tsNodeRefId);
taosCloseRef(tsNodeRefId);
tsNodeRefId = -1;
}
......@@ -89,7 +90,7 @@ void syncCleanUp() {
int64_t syncOpen(SSyncInfo* pSyncInfo) {
SSyncNode* pSyncNode = syncNodeOpen(pSyncInfo);
if (pSyncNode == NULL) {
sError("vgId:%d, failed to open sync node since %s", pSyncInfo->vgId, terrstr());
sError("failed to open sync node. vgId:%d", pSyncInfo->vgId);
return -1;
}
......@@ -100,7 +101,7 @@ int64_t syncOpen(SSyncInfo* pSyncInfo) {
return -1;
}
sDebug("vgId:%d, sync rid:%" PRId64 " is added to rsetId:%" PRId32, pSyncInfo->vgId, pSyncNode->rid, tsNodeRefId);
sDebug("vgId:%d, sync rid:%" PRId64 " is added to rsetId:%d", pSyncInfo->vgId, pSyncNode->rid, tsNodeRefId);
return pSyncNode->rid;
}
......@@ -146,7 +147,7 @@ void syncStop(int64_t rid) {
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
taosRemoveRef(tsNodeRefId, rid);
sDebug("vgId:%d, sync rid:%" PRId64 " is removed from rsetId:%" PRId64, vgId, rid, (int64_t)tsNodeRefId);
sDebug("vgId:%d, sync rid:%" PRId64 " is removed from rsetId:%d", vgId, rid, tsNodeRefId);
}
int32_t syncSetStandby(int64_t rid) {
......@@ -238,7 +239,6 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) {
}
ASSERT(rid == pSyncNode->rid);
#if 0
if (!syncNodeCheckNewConfig(pSyncNode, pNewCfg)) {
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
terrno = TSDB_CODE_SYN_NEW_CONFIG_ERROR;
......@@ -246,6 +246,7 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) {
return -1;
}
#if 0
char* newconfig = syncCfg2Str((SSyncCfg*)pNewCfg);
int32_t ret = 0;
......@@ -263,6 +264,17 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) {
#else
syncNodeUpdateNewConfigIndex(pSyncNode, pNewCfg);
syncNodeDoConfigChange(pSyncNode, pNewCfg, SYNC_INDEX_INVALID);
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
syncNodeStopHeartbeatTimer(pSyncNode);
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
syncHbTimerInit(pSyncNode, &(pSyncNode->peerHeartbeatTimerArr[i]), (pSyncNode->replicasId)[i]);
}
syncNodeStartHeartbeatTimer(pSyncNode);
syncNodeReplicate(pSyncNode);
}
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
return 0;
#endif
......@@ -294,6 +306,225 @@ int32_t syncLeaderTransferTo(int64_t rid, SNodeInfo newLeader) {
return ret;
}
SyncIndex syncMinMatchIndex(SSyncNode* pSyncNode) {
SyncIndex minMatchIndex = SYNC_INDEX_INVALID;
if (pSyncNode->peersNum > 0) {
minMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[0]));
}
for (int32_t i = 1; i < pSyncNode->peersNum; ++i) {
SyncIndex matchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[i]));
if (matchIndex < minMatchIndex) {
minMatchIndex = matchIndex;
}
}
return minMatchIndex;
}
char* syncNodePeerState2Str(const SSyncNode* pSyncNode) {
int32_t len = 128;
int32_t useLen = 0;
int32_t leftLen = len - useLen;
char* pStr = taosMemoryMalloc(len);
memset(pStr, 0, len);
char* p = pStr;
int32_t use = snprintf(p, leftLen, "{");
useLen += use;
leftLen -= use;
for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) {
SPeerState* pState = syncNodeGetPeerState((SSyncNode*)pSyncNode, &(pSyncNode->replicasId[i]));
ASSERT(pState != NULL);
p = pStr + useLen;
use = snprintf(p, leftLen, "%d:%" PRId64 " ,%" PRId64, i, pState->lastSendIndex, pState->lastSendTime);
useLen += use;
leftLen -= use;
}
p = pStr + useLen;
use = snprintf(p, leftLen, "}");
useLen += use;
leftLen -= use;
// sTrace("vgId:%d, ------------------ syncNodePeerState2Str:%s", pSyncNode->vgId, pStr);
return pStr;
}
int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid);
if (pSyncNode == NULL) {
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
return -1;
}
ASSERT(rid == pSyncNode->rid);
int32_t code = 0;
if (syncNodeIsMnode(pSyncNode)) {
// mnode
int64_t logRetention = SYNC_MNODE_LOG_RETENTION;
SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
int64_t logNum = endIndex - beginIndex;
bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore);
if (isEmpty || (!isEmpty && logNum < logRetention)) {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf),
"new-snapshot-index:%" PRId64 ", log-num:%" PRId64 ", empty:%d, do not delete wal", lastApplyIndex,
logNum, isEmpty);
syncNodeEventLog(pSyncNode, logBuf);
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
return 0;
}
goto _DEL_WAL;
} else {
// vnode
if (pSyncNode->replicaNum > 1) {
// multi replicas
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
pSyncNode->minMatchIndex = syncMinMatchIndex(pSyncNode);
for (int32_t i = 0; i < pSyncNode->peersNum; ++i) {
int64_t matchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[i]));
if (lastApplyIndex > matchIndex) {
do {
char host[64];
uint16_t port;
syncUtilU642Addr(pSyncNode->peersId[i].addr, host, sizeof(host), &port);
char logBuf[256];
snprintf(logBuf, sizeof(logBuf),
"new-snapshot-index:%" PRId64 " is greater than match-index:%" PRId64
" of %s:%d, do not delete wal",
lastApplyIndex, matchIndex, host, port);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
return 0;
}
}
} else if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) {
if (lastApplyIndex > pSyncNode->minMatchIndex) {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf),
"new-snapshot-index:%" PRId64 " is greater than min-match-index:%" PRId64 ", do not delete wal",
lastApplyIndex, pSyncNode->minMatchIndex);
syncNodeEventLog(pSyncNode, logBuf);
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
return 0;
}
} else if (pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE) {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf), "new-snapshot-index:%" PRId64 " candidate, do not delete wal", lastApplyIndex);
syncNodeEventLog(pSyncNode, logBuf);
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
return 0;
} else {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf), "new-snapshot-index:%" PRId64 " unknown state, do not delete wal",
lastApplyIndex);
syncNodeEventLog(pSyncNode, logBuf);
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
return 0;
}
goto _DEL_WAL;
} else {
// one replica
goto _DEL_WAL;
}
}
_DEL_WAL:
do {
SyncIndex snapshottingIndex = atomic_load_64(&pSyncNode->snapshottingIndex);
if (snapshottingIndex == SYNC_INDEX_INVALID) {
atomic_store_64(&pSyncNode->snapshottingIndex, lastApplyIndex);
pSyncNode->snapshottingTime = taosGetTimestampMs();
SSyncLogStoreData* pData = pSyncNode->pLogStore->data;
code = walBeginSnapshot(pData->pWal, lastApplyIndex);
if (code == 0) {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf), "wal snapshot begin, index:%" PRId64 ", last apply index:%" PRId64,
pSyncNode->snapshottingIndex, lastApplyIndex);
syncNodeEventLog(pSyncNode, logBuf);
} else {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf),
"wal snapshot begin error since:%s, index:%" PRId64 ", last apply index:%" PRId64, terrstr(terrno),
pSyncNode->snapshottingIndex, lastApplyIndex);
syncNodeErrorLog(pSyncNode, logBuf);
atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID);
}
} else {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf),
"snapshotting for %" PRId64 ", do not delete wal for new-snapshot-index:%" PRId64, snapshottingIndex,
lastApplyIndex);
syncNodeEventLog(pSyncNode, logBuf);
}
} while (0);
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
return code;
}
int32_t syncEndSnapshot(int64_t rid) {
SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid);
if (pSyncNode == NULL) {
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
return -1;
}
ASSERT(rid == pSyncNode->rid);
int32_t code = 0;
if (atomic_load_64(&pSyncNode->snapshottingIndex) != SYNC_INDEX_INVALID) {
SSyncLogStoreData* pData = pSyncNode->pLogStore->data;
code = walEndSnapshot(pData->pWal);
if (code != 0) {
sError("vgId:%d, wal snapshot end error since:%s", pSyncNode->vgId, terrstr(terrno));
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
return -1;
} else {
do {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf), "wal snapshot end, index:%" PRId64,
atomic_load_64(&pSyncNode->snapshottingIndex));
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID);
}
}
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
return code;
}
int32_t syncNodeLeaderTransfer(SSyncNode* pSyncNode) {
if (pSyncNode->peersNum == 0) {
sDebug("only one replica, cannot leader transfer");
......@@ -316,7 +547,7 @@ int32_t syncNodeLeaderTransferTo(SSyncNode* pSyncNode, SNodeInfo newLeader) {
}
do {
char logBuf[256];
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "begin leader transfer to %s:%u", newLeader.nodeFqdn, newLeader.nodePort);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
......@@ -716,24 +947,6 @@ int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak) {
return ret;
}
int32_t syncProposeBatch(int64_t rid, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize) {
if (arrSize < 0) {
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
return -1;
}
SSyncNode* pSyncNode = taosAcquireRef(tsNodeRefId, rid);
if (pSyncNode == NULL) {
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
return -1;
}
ASSERT(rid == pSyncNode->rid);
int32_t ret = syncNodeProposeBatch(pSyncNode, pMsgPArr, pIsWeakArr, arrSize);
taosReleaseRef(tsNodeRefId, pSyncNode->rid);
return ret;
}
static bool syncNodeBatchOK(SRpcMsg** pMsgPArr, int32_t arrSize) {
for (int32_t i = 0; i < arrSize; ++i) {
if (pMsgPArr[i]->msgType == TDMT_SYNC_CONFIG_CHANGE) {
......@@ -748,91 +961,6 @@ static bool syncNodeBatchOK(SRpcMsg** pMsgPArr, int32_t arrSize) {
return true;
}
int32_t syncNodeProposeBatch(SSyncNode* pSyncNode, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize) {
if (!syncNodeBatchOK(pMsgPArr, arrSize)) {
syncNodeErrorLog(pSyncNode, "sync propose batch error");
terrno = TSDB_CODE_SYN_BATCH_ERROR;
return -1;
}
if (arrSize > SYNC_MAX_BATCH_SIZE) {
syncNodeErrorLog(pSyncNode, "sync propose batch error");
terrno = TSDB_CODE_SYN_BATCH_ERROR;
return -1;
}
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
syncNodeErrorLog(pSyncNode, "sync propose not leader");
terrno = TSDB_CODE_SYN_NOT_LEADER;
return -1;
}
if (pSyncNode->changing) {
syncNodeErrorLog(pSyncNode, "sync propose not ready");
terrno = TSDB_CODE_SYN_PROPOSE_NOT_READY;
return -1;
}
SRaftMeta raftArr[SYNC_MAX_BATCH_SIZE];
for (int i = 0; i < arrSize; ++i) {
do {
char eventLog[128];
snprintf(eventLog, sizeof(eventLog), "propose message, type:%s batch:%d", TMSG_INFO(pMsgPArr[i]->msgType),
arrSize);
syncNodeEventLog(pSyncNode, eventLog);
} while (0);
SRespStub stub;
stub.createTime = taosGetTimestampMs();
stub.rpcMsg = *(pMsgPArr[i]);
uint64_t seqNum = syncRespMgrAdd(pSyncNode->pSyncRespMgr, &stub);
raftArr[i].isWeak = pIsWeakArr[i];
raftArr[i].seqNum = seqNum;
}
SyncClientRequestBatch* pSyncMsg = syncClientRequestBatchBuild(pMsgPArr, raftArr, arrSize, pSyncNode->vgId);
ASSERT(pSyncMsg != NULL);
SRpcMsg rpcMsg;
syncClientRequestBatch2RpcMsg(pSyncMsg, &rpcMsg);
taosMemoryFree(pSyncMsg); // only free msg body, do not free rpc msg content
if (pSyncNode->replicaNum == 1 && pSyncNode->vgId != 1) {
int32_t code = syncNodeOnClientRequestBatchCb(pSyncNode, pSyncMsg);
if (code == 0) {
// update rpc msg applyIndex
SRpcMsg* msgArr = syncClientRequestBatchRpcMsgArr(pSyncMsg);
ASSERT(arrSize == pSyncMsg->dataCount);
for (int i = 0; i < arrSize; ++i) {
pMsgPArr[i]->info.conn.applyIndex = msgArr[i].info.conn.applyIndex;
syncRespMgrDel(pSyncNode->pSyncRespMgr, raftArr[i].seqNum);
}
rpcFreeCont(rpcMsg.pCont);
terrno = 0;
return 1;
} else {
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
return -1;
}
} else {
if (pSyncNode->FpEqMsg != NULL && (*pSyncNode->FpEqMsg)(pSyncNode->msgcb, &rpcMsg) == 0) {
// enqueue msg ok
return 0;
} else {
sError("vgId:%d, enqueue msg error, FpEqMsg is NULL", pSyncNode->vgId);
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
return -1;
}
}
return 0;
}
int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak) {
int32_t ret = 0;
......@@ -867,7 +995,7 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak) {
if (!pSyncNode->restoreFinish && pSyncNode->vgId != 1) {
ret = -1;
terrno = TSDB_CODE_SYN_PROPOSE_NOT_READY;
sError("vgId:%d, failed to sync propose since not ready, type:%s, last:%" PRId64 ", cmt:%" PRId64 "",
sError("vgId:%d, failed to sync propose since not ready, type:%s, last:%" PRId64 ", cmt:%" PRId64,
pSyncNode->vgId, TMSG_INFO(pMsg->msgType), syncNodeGetLastIndex(pSyncNode), pSyncNode->commitIndex);
goto _END;
}
......@@ -884,7 +1012,7 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak) {
// optimized one replica
if (syncNodeIsOptimizedOneReplica(pSyncNode, pMsg)) {
SyncIndex retIndex;
int32_t code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, &retIndex);
int32_t code = syncNodeOnClientRequest(pSyncNode, pSyncMsg, &retIndex);
if (code == 0) {
pMsg->info.conn.applyIndex = retIndex;
pMsg->info.conn.applyTerm = pSyncNode->pRaftStore->currentTerm;
......@@ -925,8 +1053,46 @@ _END:
return ret;
}
int32_t syncHbTimerInit(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer, SRaftId destId) {
pSyncTimer->pTimer = NULL;
pSyncTimer->counter = 0;
pSyncTimer->timerMS = pSyncNode->hbBaseLine;
pSyncTimer->timerCb = syncNodeEqPeerHeartbeatTimer;
pSyncTimer->destId = destId;
atomic_store_64(&pSyncTimer->logicClock, 0);
return 0;
}
int32_t syncHbTimerStart(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer) {
int32_t ret = 0;
if (syncEnvIsStart()) {
SSyncHbTimerData* pData = taosMemoryMalloc(sizeof(SSyncHbTimerData));
pData->pSyncNode = pSyncNode;
pData->pTimer = pSyncTimer;
pData->destId = pSyncTimer->destId;
pData->logicClock = pSyncTimer->logicClock;
pSyncTimer->pData = pData;
taosTmrReset(pSyncTimer->timerCb, pSyncTimer->timerMS, pData, gSyncEnv->pTimerManager, &pSyncTimer->pTimer);
} else {
sError("vgId:%d, start ctrl hb timer error, sync env is stop", pSyncNode->vgId);
}
return ret;
}
int32_t syncHbTimerStop(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer) {
int32_t ret = 0;
atomic_add_fetch_64(&pSyncTimer->logicClock, 1);
taosTmrStop(pSyncTimer->pTimer);
pSyncTimer->pTimer = NULL;
// taosMemoryFree(pSyncTimer->pData);
return ret;
}
// open/close --------------
SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
SSyncNode* syncNodeOpen(SSyncInfo* pOldSyncInfo) {
SSyncInfo* pSyncInfo = (SSyncInfo*)pOldSyncInfo;
SSyncNode* pSyncNode = (SSyncNode*)taosMemoryCalloc(1, sizeof(SSyncNode));
if (pSyncNode == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
......@@ -955,6 +1121,9 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
sError("failed to create raft cfg file. configPath: %s", pSyncNode->configPath);
goto _error;
}
if (pSyncInfo->syncCfg.replicaNum == 0) {
pSyncInfo->syncCfg = pSyncNode->pRaftCfg->cfg;
}
} else {
// update syncCfg by raft_config.json
pSyncNode->pRaftCfg = raftCfgOpen(pSyncNode->configPath);
......@@ -962,9 +1131,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
sError("failed to open raft cfg file. path:%s", pSyncNode->configPath);
goto _error;
}
if (pSyncInfo->syncCfg.replicaNum == 0) {
pSyncInfo->syncCfg = pSyncNode->pRaftCfg->cfg;
}
raftCfgClose(pSyncNode->pRaftCfg);
pSyncNode->pRaftCfg = NULL;
......@@ -981,6 +1148,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode->msgcb = pSyncInfo->msgcb;
pSyncNode->FpSendMsg = pSyncInfo->FpSendMsg;
pSyncNode->FpEqMsg = pSyncInfo->FpEqMsg;
pSyncNode->FpEqCtrlMsg = pSyncInfo->FpEqCtrlMsg;
// init raft config
pSyncNode->pRaftCfg = raftCfgOpen(pSyncNode->configPath);
......@@ -1136,29 +1304,22 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode->FpHeartbeatTimerCB = syncNodeEqHeartbeatTimer;
pSyncNode->heartbeatTimerCounter = 0;
// init peer heartbeat timer
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
syncHbTimerInit(pSyncNode, &(pSyncNode->peerHeartbeatTimerArr[i]), (pSyncNode->replicasId)[i]);
}
// init callback
pSyncNode->FpOnPing = syncNodeOnPingCb;
pSyncNode->FpOnPingReply = syncNodeOnPingReplyCb;
pSyncNode->FpOnClientRequest = syncNodeOnClientRequestCb;
pSyncNode->FpOnTimeout = syncNodeOnTimeoutCb;
pSyncNode->FpOnSnapshotSend = syncNodeOnSnapshotSendCb;
pSyncNode->FpOnSnapshotRsp = syncNodeOnSnapshotRspCb;
if (pSyncNode->pRaftCfg->snapshotStrategy) {
sInfo("vgId:%d, sync node use snapshot", pSyncNode->vgId);
pSyncNode->FpOnRequestVote = syncNodeOnRequestVoteSnapshotCb;
pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReplySnapshotCb;
pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntriesSnapshotCb;
pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReplySnapshotCb;
} else {
sInfo("vgId:%d, sync node do not use snapshot", pSyncNode->vgId);
pSyncNode->FpOnRequestVote = syncNodeOnRequestVoteCb;
pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReplyCb;
pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntriesCb;
pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReplyCb;
}
pSyncNode->FpOnClientRequest = syncNodeOnClientRequest;
pSyncNode->FpOnTimeout = syncNodeOnTimer;
pSyncNode->FpOnSnapshot = syncNodeOnSnapshot;
pSyncNode->FpOnSnapshotReply = syncNodeOnSnapshotReply;
pSyncNode->FpOnRequestVote = syncNodeOnRequestVote;
pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReply;
pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntries;
pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReply;
// tools
pSyncNode->pSyncRespMgr = syncRespMgrCreate(pSyncNode, SYNC_RESP_TTL_MS);
......@@ -1183,6 +1344,12 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
// is config changing
pSyncNode->changing = false;
// peer state
syncNodePeerStateInit(pSyncNode);
// min match index
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
// start in syncNodeStart
// start raft
// syncNodeBecomeFollower(pSyncNode);
......@@ -1192,6 +1359,9 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode->leaderTime = timeNow;
pSyncNode->lastReplicateTime = timeNow;
// snapshotting
atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID);
syncNodeEventLog(pSyncNode, "sync open");
return pSyncNode;
......@@ -1226,15 +1396,14 @@ void syncNodeStart(SSyncNode* pSyncNode) {
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop(pSyncNode);
syncMaybeAdvanceCommitIndex(pSyncNode);
} else {
syncNodeBecomeFollower(pSyncNode, "first start");
}
if (pSyncNode->vgId == 1) {
int32_t ret = 0;
ret = syncNodeStartPingTimer(pSyncNode);
ASSERT(ret == 0);
}
}
void syncNodeStartStandBy(SSyncNode* pSyncNode) {
......@@ -1247,11 +1416,9 @@ void syncNodeStartStandBy(SSyncNode* pSyncNode) {
int32_t ret = syncNodeRestartElectTimer(pSyncNode, electMS);
ASSERT(ret == 0);
if (pSyncNode->vgId == 1) {
int32_t ret = 0;
ret = 0;
ret = syncNodeStartPingTimer(pSyncNode);
ASSERT(ret == 0);
}
}
void syncNodeClose(SSyncNode* pSyncNode) {
......@@ -1383,11 +1550,13 @@ int32_t syncNodeStartElectTimer(SSyncNode* pSyncNode, int32_t ms) {
&pSyncNode->pElectTimer);
atomic_store_64(&pSyncNode->electTimerLogicClock, pSyncNode->electTimerLogicClockUser);
/*
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "elect timer reset, ms:%d", ms);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
*/
} else {
sError("vgId:%d, start elect timer error, sync env is stop", pSyncNode->vgId);
......@@ -1401,7 +1570,7 @@ int32_t syncNodeStopElectTimer(SSyncNode* pSyncNode) {
taosTmrStop(pSyncNode->pElectTimer);
pSyncNode->pElectTimer = NULL;
sTrace("vgId:%d, sync %s stop elect timer", pSyncNode->vgId, syncUtilState2String(pSyncNode->state));
// sTrace("vgId:%d, sync %s stop elect timer", pSyncNode->vgId, syncUtilState2String(pSyncNode->state));
return ret;
}
......@@ -1453,30 +1622,34 @@ static int32_t syncNodeDoStartHeartbeatTimer(SSyncNode* pSyncNode) {
}
int32_t syncNodeStartHeartbeatTimer(SSyncNode* pSyncNode) {
int32_t ret = 0;
#if 0
pSyncNode->heartbeatTimerMS = pSyncNode->hbBaseLine;
int32_t ret = syncNodeDoStartHeartbeatTimer(pSyncNode);
return ret;
}
ret = syncNodeDoStartHeartbeatTimer(pSyncNode);
#endif
int32_t syncNodeStartHeartbeatTimerMS(SSyncNode* pSyncNode, int32_t ms) {
pSyncNode->heartbeatTimerMS = ms;
int32_t ret = syncNodeDoStartHeartbeatTimer(pSyncNode);
return ret;
}
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SSyncTimer* pSyncTimer = syncNodeGetHbTimer(pSyncNode, &(pSyncNode->peersId[i]));
syncHbTimerStart(pSyncNode, pSyncTimer);
}
int32_t syncNodeStartHeartbeatTimerNow(SSyncNode* pSyncNode) {
pSyncNode->heartbeatTimerMS = 1;
int32_t ret = syncNodeDoStartHeartbeatTimer(pSyncNode);
return ret;
}
int32_t syncNodeStopHeartbeatTimer(SSyncNode* pSyncNode) {
int32_t ret = 0;
#if 0
atomic_add_fetch_64(&pSyncNode->heartbeatTimerLogicClockUser, 1);
taosTmrStop(pSyncNode->pHeartbeatTimer);
pSyncNode->pHeartbeatTimer = NULL;
#endif
sTrace("vgId:%d, sync %s stop heartbeat timer", pSyncNode->vgId, syncUtilState2String(pSyncNode->state));
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SSyncTimer* pSyncTimer = syncNodeGetHbTimer(pSyncNode, &(pSyncNode->peersId[i]));
syncHbTimerStop(pSyncNode, pSyncTimer);
}
return ret;
}
......@@ -1487,18 +1660,6 @@ int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode) {
return 0;
}
int32_t syncNodeRestartHeartbeatTimerNow(SSyncNode* pSyncNode) {
syncNodeStopHeartbeatTimer(pSyncNode);
syncNodeStartHeartbeatTimerNow(pSyncNode);
return 0;
}
int32_t syncNodeRestartNowHeartbeatTimerMS(SSyncNode* pSyncNode, int32_t ms) {
syncNodeStopHeartbeatTimer(pSyncNode);
syncNodeStartHeartbeatTimerMS(pSyncNode, ms);
return 0;
}
// utils --------------
int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pSyncNode, SRpcMsg* pMsg) {
SEpSet epSet;
......@@ -1702,8 +1863,6 @@ char* syncNode2Str(const SSyncNode* pSyncNode) {
}
inline void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) {
int32_t userStrLen = strlen(str);
SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0};
if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpGetSnapshotInfo != NULL) {
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
......@@ -1722,22 +1881,25 @@ inline void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) {
printStr = pCfgStr;
}
char* peerStateStr = syncNodePeerState2Str(pSyncNode);
int32_t userStrLen = strlen(str) + strlen(peerStateStr);
if (userStrLen < 256) {
char logBuf[256 + 256];
if (pSyncNode != NULL && pSyncNode->pRaftCfg != NULL && pSyncNode->pRaftStore != NULL) {
snprintf(logBuf, sizeof(logBuf),
"vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", snap:%" PRId64
", snap-tm:%" PRIu64
"vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64
", snap:%" PRId64 ", snap-tm:%" PRIu64
", sby:%d, "
"stgy:%d, bch:%d, "
"r-num:%d, "
"lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s",
"lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s, %s",
pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm,
pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm,
pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, pSyncNode->pRaftCfg->batchSize,
pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing,
pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode), pSyncNode->electTimerLogicClockUser,
pSyncNode->heartbeatTimerLogicClockUser, printStr);
pSyncNode->commitIndex, logBeginIndex, logLastIndex, pSyncNode->minMatchIndex, snapshot.lastApplyIndex,
snapshot.lastApplyTerm, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy,
pSyncNode->pRaftCfg->batchSize, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex,
pSyncNode->changing, pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode),
pSyncNode->electTimerLogicClockUser, pSyncNode->heartbeatTimerLogicClockUser, peerStateStr, printStr);
} else {
snprintf(logBuf, sizeof(logBuf), "%s", str);
}
......@@ -1750,18 +1912,18 @@ inline void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) {
char* s = (char*)taosMemoryMalloc(len);
if (pSyncNode != NULL && pSyncNode->pRaftCfg != NULL && pSyncNode->pRaftStore != NULL) {
snprintf(s, len,
"vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", snap:%" PRId64
", snap-tm:%" PRIu64
"vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64
", snap:%" PRId64 ", snap-tm:%" PRIu64
", sby:%d, "
"stgy:%d, bch:%d, "
"r-num:%d, "
"lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s",
"lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s, %s",
pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm,
pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm,
pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, pSyncNode->pRaftCfg->batchSize,
pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing,
pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode), pSyncNode->electTimerLogicClockUser,
pSyncNode->heartbeatTimerLogicClockUser, printStr);
pSyncNode->commitIndex, logBeginIndex, logLastIndex, pSyncNode->minMatchIndex, snapshot.lastApplyIndex,
snapshot.lastApplyTerm, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy,
pSyncNode->pRaftCfg->batchSize, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex,
pSyncNode->changing, pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode),
pSyncNode->electTimerLogicClockUser, pSyncNode->heartbeatTimerLogicClockUser, peerStateStr, printStr);
} else {
snprintf(s, len, "%s", str);
}
......@@ -1771,6 +1933,7 @@ inline void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) {
taosMemoryFree(s);
}
taosMemoryFree(peerStateStr);
taosMemoryFree(pCfgStr);
}
......@@ -1799,17 +1962,18 @@ inline void syncNodeErrorLog(const SSyncNode* pSyncNode, char* str) {
char logBuf[256 + 256];
if (pSyncNode != NULL && pSyncNode->pRaftCfg != NULL && pSyncNode->pRaftStore != NULL) {
snprintf(logBuf, sizeof(logBuf),
"vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", snap:%" PRId64
", snap-tm:%" PRIu64
"vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64
", snap:%" PRId64 ", snap-tm:%" PRIu64
", sby:%d, "
"stgy:%d, bch:%d, "
"r-num:%d, "
"lcfg:%" PRId64 ", chging:%d, rsto:%d, %s",
"lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s",
pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm,
pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm,
pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, pSyncNode->pRaftCfg->batchSize,
pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing,
pSyncNode->restoreFinish, printStr);
pSyncNode->commitIndex, logBeginIndex, logLastIndex, pSyncNode->minMatchIndex, snapshot.lastApplyIndex,
snapshot.lastApplyTerm, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy,
pSyncNode->pRaftCfg->batchSize, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex,
pSyncNode->changing, pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode),
pSyncNode->electTimerLogicClockUser, pSyncNode->heartbeatTimerLogicClockUser, printStr);
} else {
snprintf(logBuf, sizeof(logBuf), "%s", str);
}
......@@ -1820,17 +1984,18 @@ inline void syncNodeErrorLog(const SSyncNode* pSyncNode, char* str) {
char* s = (char*)taosMemoryMalloc(len);
if (pSyncNode != NULL && pSyncNode->pRaftCfg != NULL && pSyncNode->pRaftStore != NULL) {
snprintf(s, len,
"vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", snap:%" PRId64
", snap-tm:%" PRIu64
"vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64
", snap:%" PRId64 ", snap-tm:%" PRIu64
", sby:%d, "
"stgy:%d, bch:%d, "
"r-num:%d, "
"lcfg:%" PRId64 ", chging:%d, rsto:%d, %s",
"lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s",
pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm,
pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm,
pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, pSyncNode->pRaftCfg->batchSize,
pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing,
pSyncNode->restoreFinish, printStr);
pSyncNode->commitIndex, logBeginIndex, logLastIndex, pSyncNode->minMatchIndex, snapshot.lastApplyIndex,
snapshot.lastApplyTerm, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy,
pSyncNode->pRaftCfg->batchSize, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex,
pSyncNode->changing, pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode),
pSyncNode->electTimerLogicClockUser, pSyncNode->heartbeatTimerLogicClockUser, printStr);
} else {
snprintf(s, len, "%s", str);
}
......@@ -1906,11 +2071,12 @@ static bool syncIsConfigChanged(const SSyncCfg* pOldCfg, const SSyncCfg* pNewCfg
void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex lastConfigChangeIndex) {
SSyncCfg oldConfig = pSyncNode->pRaftCfg->cfg;
#if 1
if (!syncIsConfigChanged(&oldConfig, pNewConfig)) {
sInfo("vgId:1, sync not reconfig since not changed");
return;
}
#endif
pSyncNode->pRaftCfg->cfg = *pNewConfig;
pSyncNode->pRaftCfg->lastConfigIndex = lastConfigChangeIndex;
......@@ -1993,13 +2159,14 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
syncUtilnodeInfo2raftId(&pSyncNode->pRaftCfg->cfg.nodeInfo[i], pSyncNode->vgId, &pSyncNode->replicasId[i]);
}
// update quorum first
pSyncNode->quorum = syncUtilQuorum(pSyncNode->pRaftCfg->cfg.replicaNum);
syncIndexMgrUpdate(pSyncNode->pNextIndex, pSyncNode);
syncIndexMgrUpdate(pSyncNode->pMatchIndex, pSyncNode);
voteGrantedUpdate(pSyncNode->pVotesGranted, pSyncNode);
votesRespondUpdate(pSyncNode->pVotesRespond, pSyncNode);
pSyncNode->quorum = syncUtilQuorum(pSyncNode->pRaftCfg->cfg.replicaNum);
// reset snapshot senders
// clear new
......@@ -2148,6 +2315,30 @@ void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term) {
}
}
void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm) {
ASSERT(pSyncNode->pRaftStore->currentTerm <= newTerm);
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "step down, new-term:%" PRIu64 ", current-term:%" PRIu64, newTerm,
pSyncNode->pRaftStore->currentTerm);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
if (pSyncNode->pRaftStore->currentTerm < newTerm) {
raftStoreSetTerm(pSyncNode->pRaftStore, newTerm);
char tmpBuf[64];
snprintf(tmpBuf, sizeof(tmpBuf), "step down, update term to %" PRIu64, newTerm);
syncNodeBecomeFollower(pSyncNode, tmpBuf);
raftStoreClearVote(pSyncNode->pRaftStore);
} else {
if (pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER) {
syncNodeBecomeFollower(pSyncNode, "step down");
}
}
}
void syncNodeLeaderChangeRsp(SSyncNode* pSyncNode) { syncRespCleanRsp(pSyncNode->pSyncRespMgr); }
void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) {
......@@ -2171,6 +2362,9 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode->pFsm->FpBecomeFollowerCb(pSyncNode->pFsm);
}
// min match index
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
// trace log
do {
int32_t debugStrLen = strlen(debugStr);
......@@ -2237,6 +2431,9 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode->pMatchIndex->index[i] = SYNC_INDEX_INVALID;
}
// init peer mgr
syncNodePeerStateInit(pSyncNode);
// update sender private term
SSyncSnapshotSender* pMySender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->myRaftId));
if (pMySender != NULL) {
......@@ -2259,11 +2456,17 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
// start heartbeat timer
syncNodeStartHeartbeatTimer(pSyncNode);
// send heartbeat right now
syncNodeHeartbeatPeers(pSyncNode);
// call back
if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpBecomeLeaderCb != NULL) {
pSyncNode->pFsm->FpBecomeLeaderCb(pSyncNode->pFsm);
}
// min match index
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
// trace log
do {
int32_t debugStrLen = strlen(debugStr);
......@@ -2282,7 +2485,7 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
// ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted));
ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted));
syncNodeBecomeLeader(pSyncNode, "candidate to leader");
syncNodeLog2("==state change syncNodeCandidate2Leader==", pSyncNode);
......@@ -2290,6 +2493,21 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop(pSyncNode);
syncMaybeAdvanceCommitIndex(pSyncNode);
if (pSyncNode->replicaNum > 1) {
syncNodeReplicate(pSyncNode);
}
}
bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); }
int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) {
for (int i = 0; i < TSDB_MAX_REPLICA; ++i) {
pSyncNode->peerStates[i].lastSendIndex = SYNC_INDEX_INVALID;
pSyncNode->peerStates[i].lastSendTime = 0;
}
return 0;
}
void syncNodeFollower2Candidate(SSyncNode* pSyncNode) {
......@@ -2475,35 +2693,35 @@ int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex
// for debug --------------
void syncNodePrint(SSyncNode* pObj) {
char* serialized = syncNode2Str(pObj);
printf("syncNodePrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncNodePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncNodePrint2(char* s, SSyncNode* pObj) {
char* serialized = syncNode2Str(pObj);
printf("syncNodePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncNodePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncNodeLog(SSyncNode* pObj) {
char* serialized = syncNode2Str(pObj);
sTraceLong("syncNodeLog | len:%lu | %s", strlen(serialized), serialized);
sTraceLong("syncNodeLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncNodeLog2(char* s, SSyncNode* pObj) {
if (gRaftDetailLog) {
char* serialized = syncNode2Str(pObj);
sTraceLong("syncNodeLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTraceLong("syncNodeLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
void syncNodeLog3(char* s, SSyncNode* pObj) {
char* serialized = syncNode2Str(pObj);
sTraceLong("syncNodeLog3 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTraceLong("syncNodeLog3 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
......@@ -2619,6 +2837,67 @@ static void syncNodeEqHeartbeatTimer(void* param, void* tmrId) {
}
}
static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) {
SSyncHbTimerData* pData = (SSyncHbTimerData*)param;
SSyncNode* pSyncNode = pData->pSyncNode;
SSyncTimer* pSyncTimer = pData->pTimer;
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
return;
}
syncNodeEventLog(pSyncNode, "eq peer hb timer");
int64_t timerLogicClock = atomic_load_64(&pSyncTimer->logicClock);
int64_t msgLogicClock = atomic_load_64(&pData->logicClock);
if (pSyncNode->replicaNum > 1) {
if (timerLogicClock == msgLogicClock) {
SyncHeartbeat* pSyncMsg = syncHeartbeatBuild(pSyncNode->vgId);
pSyncMsg->srcId = pSyncNode->myRaftId;
pSyncMsg->destId = pData->destId;
pSyncMsg->term = pSyncNode->pRaftStore->currentTerm;
pSyncMsg->commitIndex = pSyncNode->commitIndex;
pSyncMsg->minMatchIndex = syncMinMatchIndex(pSyncNode);
pSyncMsg->privateTerm = 0;
SRpcMsg rpcMsg;
syncHeartbeat2RpcMsg(pSyncMsg, &rpcMsg);
// eq msg
#if 0
if (pSyncNode->FpEqCtrlMsg != NULL) {
int32_t code = pSyncNode->FpEqCtrlMsg(pSyncNode->msgcb, &rpcMsg);
if (code != 0) {
sError("vgId:%d, sync ctrl enqueue timer msg error, code:%d", pSyncNode->vgId, code);
rpcFreeCont(rpcMsg.pCont);
syncHeartbeatDestroy(pSyncMsg);
return;
}
} else {
sError("vgId:%d, enqueue ctrl msg cb ptr (i.e. FpEqMsg) not set.", pSyncNode->vgId);
}
#endif
// send msg
syncNodeSendHeartbeat(pSyncNode, &(pSyncMsg->destId), pSyncMsg);
syncHeartbeatDestroy(pSyncMsg);
if (syncEnvIsStart()) {
taosTmrReset(syncNodeEqPeerHeartbeatTimer, pSyncTimer->timerMS, pData, gSyncEnv->pTimerManager,
&pSyncTimer->pTimer);
} else {
sError("sync env is stop, syncNodeEqHeartbeatTimer");
}
} else {
sTrace("==syncNodeEqPeerHeartbeatTimer== timerLogicClock:%" PRIu64 ", msgLogicClock:%" PRIu64 "", timerLogicClock,
msgLogicClock);
}
}
}
static int32_t syncNodeEqNoop(SSyncNode* ths) {
int32_t ret = 0;
ASSERT(ths->state == TAOS_SYNC_STATE_LEADER);
......@@ -2676,10 +2955,9 @@ static int32_t syncNodeAppendNoop(SSyncNode* ths) {
if (ths->state == TAOS_SYNC_STATE_LEADER) {
int32_t code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry);
if (code != 0) {
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
syncNodeErrorLog(ths, "append noop error");
return -1;
}
syncNodeReplicate(ths, false);
}
if (h) {
......@@ -2738,13 +3016,15 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, SyncHeartbeat* pMsg) {
SRpcMsg rpcMsg;
syncHeartbeatReply2RpcMsg(pMsgReply, &rpcMsg);
#if 1
if (pMsg->term >= ths->pRaftStore->currentTerm && ths->state != TAOS_SYNC_STATE_FOLLOWER) {
syncNodeBecomeFollower(ths, "become follower by hb");
syncNodeStepDown(ths, pMsg->term);
}
#endif
if (pMsg->term == ths->pRaftStore->currentTerm) {
// sInfo("vgId:%d, heartbeat reset timer", ths->vgId);
if (pMsg->term == ths->pRaftStore->currentTerm && ths->state != TAOS_SYNC_STATE_LEADER) {
syncNodeResetElectTimer(ths);
ths->minMatchIndex = pMsg->minMatchIndex;
#if 0
if (ths->state == TAOS_SYNC_STATE_FOLLOWER) {
......@@ -2785,10 +3065,12 @@ int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, SyncHeartbeatReply* pMsg) {
// /\ UNCHANGED <<messages, serverVars, candidateVars,
// leaderVars, commitIndex>>
//
int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncIndex* pRetIndex) {
int32_t syncNodeOnClientRequest(SSyncNode* ths, SyncClientRequest* pMsg, SyncIndex* pRetIndex) {
syncNodeEventLog(ths, "on client request");
int32_t ret = 0;
int32_t code = 0;
syncClientRequestLog2("==syncNodeOnClientRequestCb==", pMsg);
SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore);
SyncTerm term = ths->pRaftStore->currentTerm;
......@@ -2803,16 +3085,13 @@ int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncI
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry);
if (code != 0) {
// del resp mgr, call FpCommitCb
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
ASSERT(0);
return -1;
}
// if mulit replica, start replicate right now
if (ths->replicaNum > 1) {
syncNodeReplicate(ths, false);
// pre commit
syncNodePreCommit(ths, pEntry, 0);
syncNodeReplicate(ths);
}
// if only myself, maybe commit right now
......@@ -2838,61 +3117,6 @@ int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncI
return ret;
}
int32_t syncNodeOnClientRequestBatchCb(SSyncNode* ths, SyncClientRequestBatch* pMsg) {
int32_t code = 0;
if (ths->state != TAOS_SYNC_STATE_LEADER) {
// call FpCommitCb, delete resp mgr
return -1;
}
SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore);
SyncTerm term = ths->pRaftStore->currentTerm;
int32_t raftMetaArrayLen = sizeof(SRaftMeta) * pMsg->dataCount;
int32_t rpcArrayLen = sizeof(SRpcMsg) * pMsg->dataCount;
SRaftMeta* raftMetaArr = (SRaftMeta*)(pMsg->data);
SRpcMsg* msgArr = (SRpcMsg*)((char*)(pMsg->data) + raftMetaArrayLen);
for (int32_t i = 0; i < pMsg->dataCount; ++i) {
SSyncRaftEntry* pEntry = syncEntryBuild(msgArr[i].contLen);
ASSERT(pEntry != NULL);
pEntry->originalRpcType = msgArr[i].msgType;
pEntry->seqNum = raftMetaArr[i].seqNum;
pEntry->isWeak = raftMetaArr[i].isWeak;
pEntry->term = term;
pEntry->index = index;
memcpy(pEntry->data, msgArr[i].pCont, msgArr[i].contLen);
ASSERT(msgArr[i].contLen == pEntry->dataLen);
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry);
if (code != 0) {
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
// del resp mgr, call FpCommitCb
return -1;
}
// update rpc msg conn apply.index
msgArr[i].info.conn.applyIndex = pEntry->index;
}
// fsync once
SSyncLogStoreData* pData = ths->pLogStore->data;
SWal* pWal = pData->pWal;
walFsync(pWal, false);
if (ths->replicaNum > 1) {
// if multi replica, start replicate right now
syncNodeReplicate(ths, false);
} else if (ths->replicaNum == 1) {
// one replica
syncMaybeAdvanceCommitIndex(ths);
}
return 0;
}
const char* syncStr(ESyncState state) {
switch (state) {
case TAOS_SYNC_STATE_FOLLOWER:
......@@ -2942,7 +3166,7 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "do leader transfer, index:%" PRId64 "", pEntry->index);
snprintf(logBuf, sizeof(logBuf), "do leader transfer, index:%" PRId64, pEntry->index);
syncNodeEventLog(ths, logBuf);
} while (0);
......@@ -3081,11 +3305,10 @@ static int32_t syncNodeProposeConfigChangeFinish(SSyncNode* ths, SyncReconfigFin
}
bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) {
return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType));
// return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1);
return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1);
}
int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) {
int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) {
if (beginIndex > endIndex) {
return 0;
}
......@@ -3121,10 +3344,7 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex,
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
} else {
code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry);
if (code != 0) {
sError("vgId:%d, failed to get log entry since %s. index:%" PRId64 "", ths->vgId, tstrerror(terrno), i);
return -1;
}
ASSERT(code == 0);
ASSERT(pEntry != NULL);
}
......@@ -3134,8 +3354,7 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex,
// user commit
if ((ths->pFsm->FpCommitCb != NULL) && syncUtilUserCommit(pEntry->originalRpcType)) {
bool internalExecute = true;
if ((ths->replicaNum == 1) && ths->restoreFinish) {
// if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) {
if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) {
internalExecute = false;
}
......@@ -3240,6 +3459,42 @@ SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId)
return pSender;
}
SSyncTimer* syncNodeGetHbTimer(SSyncNode* ths, SRaftId* pDestId) {
SSyncTimer* pTimer = NULL;
for (int i = 0; i < ths->replicaNum; ++i) {
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
pTimer = &((ths->peerHeartbeatTimerArr)[i]);
}
}
return pTimer;
}
SPeerState* syncNodeGetPeerState(SSyncNode* ths, const SRaftId* pDestId) {
SPeerState* pState = NULL;
for (int i = 0; i < ths->replicaNum; ++i) {
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
pState = &((ths->peerStates)[i]);
}
}
return pState;
}
bool syncNodeNeedSendAppendEntries(SSyncNode* ths, const SRaftId* pDestId, const SyncAppendEntries* pMsg) {
SPeerState* pState = syncNodeGetPeerState(ths, pDestId);
if (pState == NULL) {
return false;
}
SyncIndex sendIndex = pMsg->prevLogIndex + 1;
int64_t tsNow = taosGetTimestampMs();
if (pState->lastSendIndex == sendIndex && tsNow - pState->lastSendTime < SYNC_APPEND_ENTRIES_TIMEOUT_MS) {
return false;
}
return true;
}
bool syncNodeCanChange(SSyncNode* pSyncNode) {
if (pSyncNode->changing) {
sError("sync cannot change");
......@@ -3265,6 +3520,25 @@ bool syncNodeCanChange(SSyncNode* pSyncNode) {
return true;
}
const char* syncTimerTypeStr(enum ESyncTimeoutType timerType) {
if (timerType == SYNC_TIMEOUT_PING) {
return "ping";
} else if (timerType == SYNC_TIMEOUT_ELECTION) {
return "elect";
} else if (timerType == SYNC_TIMEOUT_HEARTBEAT) {
return "heartbeat";
} else {
return "unknown";
}
}
void syncLogRecvTimer(SSyncNode* pSyncNode, const SyncTimeout* pMsg, const char* s) {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf), "recv sync-timer {type:%s, lc:%" PRIu64 ", ms:%d, data:%p}, %s",
syncTimerTypeStr(pMsg->timeoutType), pMsg->logicClock, pMsg->timerMS, pMsg->data, s);
syncNodeEventLog(pSyncNode, logBuf);
}
void syncLogSendRequestVote(SSyncNode* pSyncNode, const SyncRequestVote* pMsg, const char* s) {
char host[64];
uint16_t port;
......@@ -3393,8 +3667,9 @@ void syncLogSendHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, const
syncUtilU642Addr(pMsg->destId.addr, host, sizeof(host), &port);
char logBuf[256];
snprintf(logBuf, sizeof(logBuf),
"send sync-heartbeat from %s:%d {term:%" PRIu64 ", cmt:%" PRIu64 ", pterm:%" PRIu64 "}, %s", host, port,
pMsg->term, pMsg->commitIndex, pMsg->privateTerm, s);
"send sync-heartbeat to %s:%d {term:%" PRIu64 ", cmt:%" PRId64 ", min-match:%" PRId64 ", pterm:%" PRIu64
"}, %s",
host, port, pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pMsg->privateTerm, s);
syncNodeEventLog(pSyncNode, logBuf);
}
......@@ -3404,8 +3679,9 @@ void syncLogRecvHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, const
syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port);
char logBuf[256];
snprintf(logBuf, sizeof(logBuf),
"recv sync-heartbeat from %s:%d {term:%" PRIu64 ", cmt:%" PRIu64 ", pterm:%" PRIu64 "}, %s", host, port,
pMsg->term, pMsg->commitIndex, pMsg->privateTerm, s);
"recv sync-heartbeat from %s:%d {term:%" PRIu64 ", cmt:%" PRId64 ", min-match:%" PRId64 ", pterm:%" PRIu64
"}, %s",
host, port, pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pMsg->privateTerm, s);
syncNodeEventLog(pSyncNode, logBuf);
}
......
......@@ -133,28 +133,28 @@ char* syncRpcMsg2Str(SRpcMsg* pRpcMsg) {
// for debug ----------------------
void syncRpcMsgPrint(SRpcMsg* pMsg) {
char* serialized = syncRpcMsg2Str(pMsg);
printf("syncRpcMsgPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncRpcMsgPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncRpcMsgPrint2(char* s, SRpcMsg* pMsg) {
char* serialized = syncRpcMsg2Str(pMsg);
printf("syncRpcMsgPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncRpcMsgPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncRpcMsgLog(SRpcMsg* pMsg) {
char* serialized = syncRpcMsg2Str(pMsg);
sTrace("syncRpcMsgLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncRpcMsgLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncRpcMsgLog2(char* s, SRpcMsg* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncRpcMsg2Str(pMsg);
sTrace("syncRpcMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncRpcMsgLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -271,21 +271,21 @@ void syncTimeoutPrint(const SyncTimeout* pMsg) {
void syncTimeoutPrint2(char* s, const SyncTimeout* pMsg) {
char* serialized = syncTimeout2Str(pMsg);
printf("syncTimeoutPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncTimeoutPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncTimeoutLog(const SyncTimeout* pMsg) {
char* serialized = syncTimeout2Str(pMsg);
sTrace("syncTimeoutLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncTimeoutLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncTimeoutLog2(char* s, const SyncTimeout* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncTimeout2Str(pMsg);
sTrace("syncTimeoutLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncTimeoutLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -526,28 +526,28 @@ char* syncPing2Str(const SyncPing* pMsg) {
// for debug ----------------------
void syncPingPrint(const SyncPing* pMsg) {
char* serialized = syncPing2Str(pMsg);
printf("syncPingPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncPingPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncPingPrint2(char* s, const SyncPing* pMsg) {
char* serialized = syncPing2Str(pMsg);
printf("syncPingPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncPingPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncPingLog(const SyncPing* pMsg) {
char* serialized = syncPing2Str(pMsg);
sTrace("syncPingLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncPingLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncPingLog2(char* s, const SyncPing* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncPing2Str(pMsg);
sTrace("syncPingLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncPingLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -931,28 +931,28 @@ char* syncClientRequest2Str(const SyncClientRequest* pMsg) {
// for debug ----------------------
void syncClientRequestPrint(const SyncClientRequest* pMsg) {
char* serialized = syncClientRequest2Str(pMsg);
printf("syncClientRequestPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncClientRequestPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncClientRequestPrint2(char* s, const SyncClientRequest* pMsg) {
char* serialized = syncClientRequest2Str(pMsg);
printf("syncClientRequestPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncClientRequestPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncClientRequestLog(const SyncClientRequest* pMsg) {
char* serialized = syncClientRequest2Str(pMsg);
sTrace("syncClientRequestLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncClientRequestLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncClientRequestLog2(char* s, const SyncClientRequest* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncClientRequest2Str(pMsg);
sTrace("syncClientRequestLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncClientRequestLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -1101,28 +1101,28 @@ char* syncClientRequestBatch2Str(const SyncClientRequestBatch* pMsg) {
// for debug ----------------------
void syncClientRequestBatchPrint(const SyncClientRequestBatch* pMsg) {
char* serialized = syncClientRequestBatch2Str(pMsg);
printf("syncClientRequestBatchPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncClientRequestBatchPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncClientRequestBatchPrint2(char* s, const SyncClientRequestBatch* pMsg) {
char* serialized = syncClientRequestBatch2Str(pMsg);
printf("syncClientRequestBatchPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncClientRequestBatchPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncClientRequestBatchLog(const SyncClientRequestBatch* pMsg) {
char* serialized = syncClientRequestBatch2Str(pMsg);
sTrace("syncClientRequestBatchLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncClientRequestBatchLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncClientRequestBatchLog2(char* s, const SyncClientRequestBatch* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncClientRequestBatch2Str(pMsg);
sTraceLong("syncClientRequestBatchLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTraceLong("syncClientRequestBatchLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -1252,28 +1252,28 @@ char* syncRequestVote2Str(const SyncRequestVote* pMsg) {
// for debug ----------------------
void syncRequestVotePrint(const SyncRequestVote* pMsg) {
char* serialized = syncRequestVote2Str(pMsg);
printf("syncRequestVotePrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncRequestVotePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncRequestVotePrint2(char* s, const SyncRequestVote* pMsg) {
char* serialized = syncRequestVote2Str(pMsg);
printf("syncRequestVotePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncRequestVotePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncRequestVoteLog(const SyncRequestVote* pMsg) {
char* serialized = syncRequestVote2Str(pMsg);
sTrace("syncRequestVoteLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncRequestVoteLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncRequestVoteLog2(char* s, const SyncRequestVote* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncRequestVote2Str(pMsg);
sTrace("syncRequestVoteLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncRequestVoteLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -1400,28 +1400,28 @@ char* syncRequestVoteReply2Str(const SyncRequestVoteReply* pMsg) {
// for debug ----------------------
void syncRequestVoteReplyPrint(const SyncRequestVoteReply* pMsg) {
char* serialized = syncRequestVoteReply2Str(pMsg);
printf("syncRequestVoteReplyPrint | len:%ld | %s \n", strlen(serialized), serialized);
printf("syncRequestVoteReplyPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncRequestVoteReplyPrint2(char* s, const SyncRequestVoteReply* pMsg) {
char* serialized = syncRequestVoteReply2Str(pMsg);
printf("syncRequestVoteReplyPrint2 | len:%ld | %s | %s \n", strlen(serialized), s, serialized);
printf("syncRequestVoteReplyPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncRequestVoteReplyLog(const SyncRequestVoteReply* pMsg) {
char* serialized = syncRequestVoteReply2Str(pMsg);
sTrace("syncRequestVoteReplyLog | len:%ld | %s", strlen(serialized), serialized);
sTrace("syncRequestVoteReplyLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncRequestVoteReplyLog2(char* s, const SyncRequestVoteReply* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncRequestVoteReply2Str(pMsg);
sTrace("syncRequestVoteReplyLog2 | len:%ld | %s | %s", strlen(serialized), s, serialized);
sTrace("syncRequestVoteReplyLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -1571,28 +1571,28 @@ char* syncAppendEntries2Str(const SyncAppendEntries* pMsg) {
// for debug ----------------------
void syncAppendEntriesPrint(const SyncAppendEntries* pMsg) {
char* serialized = syncAppendEntries2Str(pMsg);
printf("syncAppendEntriesPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncAppendEntriesPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncAppendEntriesPrint2(char* s, const SyncAppendEntries* pMsg) {
char* serialized = syncAppendEntries2Str(pMsg);
printf("syncAppendEntriesPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncAppendEntriesPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncAppendEntriesLog(const SyncAppendEntries* pMsg) {
char* serialized = syncAppendEntries2Str(pMsg);
sTrace("syncAppendEntriesLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncAppendEntriesLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncAppendEntriesLog2(char* s, const SyncAppendEntries* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncAppendEntries2Str(pMsg);
sTrace("syncAppendEntriesLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncAppendEntriesLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -1810,28 +1810,28 @@ char* syncAppendEntriesBatch2Str(const SyncAppendEntriesBatch* pMsg) {
// for debug ----------------------
void syncAppendEntriesBatchPrint(const SyncAppendEntriesBatch* pMsg) {
char* serialized = syncAppendEntriesBatch2Str(pMsg);
printf("syncAppendEntriesBatchPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncAppendEntriesBatchPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncAppendEntriesBatchPrint2(char* s, const SyncAppendEntriesBatch* pMsg) {
char* serialized = syncAppendEntriesBatch2Str(pMsg);
printf("syncAppendEntriesBatchPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncAppendEntriesBatchPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncAppendEntriesBatchLog(const SyncAppendEntriesBatch* pMsg) {
char* serialized = syncAppendEntriesBatch2Str(pMsg);
sTrace("syncAppendEntriesBatchLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncAppendEntriesBatchLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncAppendEntriesBatchLog2(char* s, const SyncAppendEntriesBatch* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncAppendEntriesBatch2Str(pMsg);
sTraceLong("syncAppendEntriesBatchLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTraceLong("syncAppendEntriesBatchLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -1966,28 +1966,28 @@ char* syncAppendEntriesReply2Str(const SyncAppendEntriesReply* pMsg) {
// for debug ----------------------
void syncAppendEntriesReplyPrint(const SyncAppendEntriesReply* pMsg) {
char* serialized = syncAppendEntriesReply2Str(pMsg);
printf("syncAppendEntriesReplyPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncAppendEntriesReplyPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncAppendEntriesReplyPrint2(char* s, const SyncAppendEntriesReply* pMsg) {
char* serialized = syncAppendEntriesReply2Str(pMsg);
printf("syncAppendEntriesReplyPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncAppendEntriesReplyPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncAppendEntriesReplyLog(const SyncAppendEntriesReply* pMsg) {
char* serialized = syncAppendEntriesReply2Str(pMsg);
sTrace("syncAppendEntriesReplyLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncAppendEntriesReplyLog | len:%d| %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncAppendEntriesReplyLog2(char* s, const SyncAppendEntriesReply* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncAppendEntriesReply2Str(pMsg);
sTrace("syncAppendEntriesReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncAppendEntriesReplyLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -2119,28 +2119,28 @@ char* syncHeartbeat2Str(const SyncHeartbeat* pMsg) {
void syncHeartbeatPrint(const SyncHeartbeat* pMsg) {
char* serialized = syncHeartbeat2Str(pMsg);
printf("syncHeartbeatPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncHeartbeatPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncHeartbeatPrint2(char* s, const SyncHeartbeat* pMsg) {
char* serialized = syncHeartbeat2Str(pMsg);
printf("syncHeartbeatPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncHeartbeatPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncHeartbeatLog(const SyncHeartbeat* pMsg) {
char* serialized = syncHeartbeat2Str(pMsg);
sTrace("syncHeartbeatLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncHeartbeatLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncHeartbeatLog2(char* s, const SyncHeartbeat* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncHeartbeat2Str(pMsg);
sTrace("syncHeartbeatLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncHeartbeatLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -2273,28 +2273,28 @@ char* syncHeartbeatReply2Str(const SyncHeartbeatReply* pMsg) {
void syncHeartbeatReplyPrint(const SyncHeartbeatReply* pMsg) {
char* serialized = syncHeartbeatReply2Str(pMsg);
printf("syncHeartbeatReplyPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncHeartbeatReplyPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncHeartbeatReplyPrint2(char* s, const SyncHeartbeatReply* pMsg) {
char* serialized = syncHeartbeatReply2Str(pMsg);
printf("syncHeartbeatReplyPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncHeartbeatReplyPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncHeartbeatReplyLog(const SyncHeartbeatReply* pMsg) {
char* serialized = syncHeartbeatReply2Str(pMsg);
sTrace("syncHeartbeatReplyLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncHeartbeatReplyLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncHeartbeatReplyLog2(char* s, const SyncHeartbeatReply* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncHeartbeatReply2Str(pMsg);
sTrace("syncHeartbeatReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncHeartbeatReplyLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -2426,28 +2426,28 @@ char* syncApplyMsg2Str(const SyncApplyMsg* pMsg) {
// for debug ----------------------
void syncApplyMsgPrint(const SyncApplyMsg* pMsg) {
char* serialized = syncApplyMsg2Str(pMsg);
printf("syncApplyMsgPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncApplyMsgPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncApplyMsgPrint2(char* s, const SyncApplyMsg* pMsg) {
char* serialized = syncApplyMsg2Str(pMsg);
printf("syncApplyMsgPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncApplyMsgPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncApplyMsgLog(const SyncApplyMsg* pMsg) {
char* serialized = syncApplyMsg2Str(pMsg);
sTrace("ssyncApplyMsgLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("ssyncApplyMsgLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncApplyMsgLog2(char* s, const SyncApplyMsg* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncApplyMsg2Str(pMsg);
sTrace("syncApplyMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncApplyMsgLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -2603,28 +2603,28 @@ char* syncSnapshotSend2Str(const SyncSnapshotSend* pMsg) {
// for debug ----------------------
void syncSnapshotSendPrint(const SyncSnapshotSend* pMsg) {
char* serialized = syncSnapshotSend2Str(pMsg);
printf("syncSnapshotSendPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncSnapshotSendPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncSnapshotSendPrint2(char* s, const SyncSnapshotSend* pMsg) {
char* serialized = syncSnapshotSend2Str(pMsg);
printf("syncSnapshotSendPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncSnapshotSendPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncSnapshotSendLog(const SyncSnapshotSend* pMsg) {
char* serialized = syncSnapshotSend2Str(pMsg);
sTrace("syncSnapshotSendLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncSnapshotSendLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncSnapshotSendLog2(char* s, const SyncSnapshotSend* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncSnapshotSend2Str(pMsg);
sTrace("syncSnapshotSendLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncSnapshotSendLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -2763,28 +2763,28 @@ char* syncSnapshotRsp2Str(const SyncSnapshotRsp* pMsg) {
// for debug ----------------------
void syncSnapshotRspPrint(const SyncSnapshotRsp* pMsg) {
char* serialized = syncSnapshotRsp2Str(pMsg);
printf("syncSnapshotRspPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncSnapshotRspPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncSnapshotRspPrint2(char* s, const SyncSnapshotRsp* pMsg) {
char* serialized = syncSnapshotRsp2Str(pMsg);
printf("syncSnapshotRspPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncSnapshotRspPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncSnapshotRspLog(const SyncSnapshotRsp* pMsg) {
char* serialized = syncSnapshotRsp2Str(pMsg);
sTrace("syncSnapshotRspLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncSnapshotRspLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncSnapshotRspLog2(char* s, const SyncSnapshotRsp* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncSnapshotRsp2Str(pMsg);
sTrace("syncSnapshotRspLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncSnapshotRspLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -2925,28 +2925,28 @@ char* syncLeaderTransfer2Str(const SyncLeaderTransfer* pMsg) {
// for debug ----------------------
void syncLeaderTransferPrint(const SyncLeaderTransfer* pMsg) {
char* serialized = syncLeaderTransfer2Str(pMsg);
printf("syncLeaderTransferPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncLeaderTransferPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncLeaderTransferPrint2(char* s, const SyncLeaderTransfer* pMsg) {
char* serialized = syncLeaderTransfer2Str(pMsg);
printf("syncLeaderTransferPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncLeaderTransferPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncLeaderTransferLog(const SyncLeaderTransfer* pMsg) {
char* serialized = syncLeaderTransfer2Str(pMsg);
sTrace("syncLeaderTransferLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncLeaderTransferLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncLeaderTransferLog2(char* s, const SyncLeaderTransfer* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncLeaderTransfer2Str(pMsg);
sTrace("syncLeaderTransferLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncLeaderTransferLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -3054,28 +3054,28 @@ char* syncReconfigFinish2Str(const SyncReconfigFinish* pMsg) {
// for debug ----------------------
void syncReconfigFinishPrint(const SyncReconfigFinish* pMsg) {
char* serialized = syncReconfigFinish2Str(pMsg);
printf("syncReconfigFinishPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncReconfigFinishPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncReconfigFinishPrint2(char* s, const SyncReconfigFinish* pMsg) {
char* serialized = syncReconfigFinish2Str(pMsg);
printf("syncReconfigFinishPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncReconfigFinishPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncReconfigFinishLog(const SyncReconfigFinish* pMsg) {
char* serialized = syncReconfigFinish2Str(pMsg);
sTrace("syncReconfigFinishLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncReconfigFinishLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncReconfigFinishLog2(char* s, const SyncReconfigFinish* pMsg) {
if (gRaftDetailLog) {
char* serialized = syncReconfigFinish2Str(pMsg);
sTrace("syncReconfigFinishLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncReconfigFinishLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -447,85 +447,85 @@ int32_t raftCfgFromStr(const char *s, SRaftCfg *pRaftCfg) {
// for debug ----------------------
void syncCfgPrint(SSyncCfg *pCfg) {
char *serialized = syncCfg2Str(pCfg);
printf("syncCfgPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("syncCfgPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncCfgPrint2(char *s, SSyncCfg *pCfg) {
char *serialized = syncCfg2Str(pCfg);
printf("syncCfgPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("syncCfgPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void syncCfgLog(SSyncCfg *pCfg) {
char *serialized = syncCfg2Str(pCfg);
sTrace("syncCfgLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("syncCfgLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void syncCfgLog2(char *s, SSyncCfg *pCfg) {
char *serialized = syncCfg2Str(pCfg);
sTrace("syncCfgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncCfgLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
void syncCfgLog3(char *s, SSyncCfg *pCfg) {
char *serialized = syncCfg2SimpleStr(pCfg);
sTrace("syncCfgLog3 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("syncCfgLog3 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
void raftCfgPrint(SRaftCfg *pCfg) {
char *serialized = raftCfg2Str(pCfg);
printf("raftCfgPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("raftCfgPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void raftCfgPrint2(char *s, SRaftCfg *pCfg) {
char *serialized = raftCfg2Str(pCfg);
printf("raftCfgPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("raftCfgPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void raftCfgLog(SRaftCfg *pCfg) {
char *serialized = raftCfg2Str(pCfg);
sTrace("raftCfgLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("raftCfgLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void raftCfgLog2(char *s, SRaftCfg *pCfg) {
char *serialized = raftCfg2Str(pCfg);
sTrace("raftCfgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("raftCfgLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
// ---------
void raftCfgIndexPrint(SRaftCfgIndex *pCfg) {
char *serialized = raftCfgIndex2Str(pCfg);
printf("raftCfgIndexPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("raftCfgIndexPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void raftCfgIndexPrint2(char *s, SRaftCfgIndex *pCfg) {
char *serialized = raftCfgIndex2Str(pCfg);
printf("raftCfgIndexPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("raftCfgIndexPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void raftCfgIndexLog(SRaftCfgIndex *pCfg) {
char *serialized = raftCfgIndex2Str(pCfg);
sTrace("raftCfgIndexLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("raftCfgIndexLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void raftCfgIndexLog2(char *s, SRaftCfgIndex *pCfg) {
char *serialized = raftCfgIndex2Str(pCfg);
sTrace("raftCfgIndexLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("raftCfgIndexLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
......@@ -418,28 +418,28 @@ char* raftCache2Str(SRaftEntryHashCache* pCache) {
void raftCachePrint(SRaftEntryHashCache* pCache) {
char* serialized = raftCache2Str(pCache);
printf("raftCachePrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("raftCachePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void raftCachePrint2(char* s, SRaftEntryHashCache* pCache) {
char* serialized = raftCache2Str(pCache);
printf("raftCachePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("raftCachePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void raftCacheLog(SRaftEntryHashCache* pCache) {
char* serialized = raftCache2Str(pCache);
sTrace("raftCacheLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("raftCacheLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void raftCacheLog2(char* s, SRaftEntryHashCache* pCache) {
if (gRaftDetailLog) {
char* serialized = raftCache2Str(pCache);
sTraceLong("raftCacheLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTraceLong("raftCacheLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -677,28 +677,28 @@ char* raftEntryCache2Str(SRaftEntryCache* pObj) {
void raftEntryCachePrint(SRaftEntryCache* pObj) {
char* serialized = raftEntryCache2Str(pObj);
printf("raftEntryCachePrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("raftEntryCachePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void raftEntryCachePrint2(char* s, SRaftEntryCache* pObj) {
char* serialized = raftEntryCache2Str(pObj);
printf("raftEntryCachePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("raftEntryCachePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void raftEntryCacheLog(SRaftEntryCache* pObj) {
char* serialized = raftEntryCache2Str(pObj);
sTrace("raftEntryCacheLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("raftEntryCacheLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void raftEntryCacheLog2(char* s, SRaftEntryCache* pObj) {
if (gRaftDetailLog) {
char* serialized = raftEntryCache2Str(pObj);
sTraceLong("raftEntryCacheLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTraceLong("raftEntryCacheLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -33,21 +33,11 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEn
static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry);
static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIndex);
static bool raftLogExist(struct SSyncLogStore* pLogStore, SyncIndex index);
static int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index);
static SyncIndex raftlogCommitIndex(SSyncLogStore* pLogStore);
// private function
static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** ppLastEntry);
//-------------------------------
// log[0 .. n]
static SSyncRaftEntry* logStoreGetLastEntry(SSyncLogStore* pLogStore);
static SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore);
static SyncTerm logStoreLastTerm(SSyncLogStore* pLogStore);
static SSyncRaftEntry* logStoreGetEntry(SSyncLogStore* pLogStore, SyncIndex index);
static int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry);
static int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex fromIndex);
static int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index);
static SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore);
//-------------------------------
SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) {
SSyncLogStore* pLogStore = taosMemoryMalloc(sizeof(SSyncLogStore));
......@@ -74,14 +64,8 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) {
pData->pWalHandle = walOpenReader(pData->pWal, NULL);
ASSERT(pData->pWalHandle != NULL);
pLogStore->appendEntry = logStoreAppendEntry;
pLogStore->getEntry = logStoreGetEntry;
pLogStore->truncate = logStoreTruncate;
pLogStore->getLastIndex = logStoreLastIndex;
pLogStore->getLastTerm = logStoreLastTerm;
pLogStore->updateCommitIndex = logStoreUpdateCommitIndex;
pLogStore->getCommitIndex = logStoreGetCommitIndex;
pLogStore->syncLogUpdateCommitIndex = raftLogUpdateCommitIndex;
pLogStore->syncLogCommitIndex = raftlogCommitIndex;
pLogStore->syncLogRestoreFromSnapshot = raftLogRestoreFromSnapshot;
pLogStore->syncLogBeginIndex = raftLogBeginIndex;
pLogStore->syncLogEndIndex = raftLogEndIndex;
......@@ -234,6 +218,8 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr
snprintf(logBuf, sizeof(logBuf), "wal write error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
pEntry->index, err, err, errStr, sysErr, sysErrStr);
syncNodeErrorLog(pData->pSyncNode, logBuf);
ASSERT(0);
return -1;
}
pEntry->index = index;
......@@ -277,11 +263,15 @@ static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index,
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
index, err, err, errStr, sysErr, sysErrStr);
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
// syncNodeEventLog(pData->pSyncNode, logBuf);
snprintf(logBuf, sizeof(logBuf),
"wal read not exist, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index, err, err,
errStr, sysErr, sysErrStr);
syncNodeEventLog(pData->pSyncNode, logBuf);
} else {
snprintf(logBuf, sizeof(logBuf), "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
index, err, err, errStr, sysErr, sysErrStr);
syncNodeErrorLog(pData->pSyncNode, logBuf);
}
} while (0);
......@@ -372,157 +362,7 @@ static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** pp
return -1;
}
//-------------------------------
// log[0 .. n]
int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) {
SSyncLogStoreData* pData = pLogStore->data;
SWal* pWal = pData->pWal;
SyncIndex index = 0;
SWalSyncInfo syncMeta = {0};
syncMeta.isWeek = pEntry->isWeak;
syncMeta.seqNum = pEntry->seqNum;
syncMeta.term = pEntry->term;
index = walAppendLog(pWal, pEntry->originalRpcType, syncMeta, pEntry->data, pEntry->dataLen);
if (index < 0) {
int32_t err = terrno;
const char* errStr = tstrerror(err);
int32_t sysErr = errno;
const char* sysErrStr = strerror(errno);
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "wal write error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
pEntry->index, err, err, errStr, sysErr, sysErrStr);
syncNodeErrorLog(pData->pSyncNode, logBuf);
ASSERT(0);
return -1;
}
pEntry->index = index;
do {
char eventLog[128];
snprintf(eventLog, sizeof(eventLog), "write2 index:%" PRId64 ", type:%s, origin type:%s", pEntry->index,
TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType));
syncNodeEventLog(pData->pSyncNode, eventLog);
} while (0);
return 0;
}
SSyncRaftEntry* logStoreGetEntryWithoutLock(SSyncLogStore* pLogStore, SyncIndex index) {
SSyncLogStoreData* pData = pLogStore->data;
SWal* pWal = pData->pWal;
if (index >= SYNC_INDEX_BEGIN && index <= logStoreLastIndex(pLogStore)) {
// SWalReadHandle* pWalHandle = walOpenReadHandle(pWal);
SWalReader* pWalHandle = pData->pWalHandle;
ASSERT(pWalHandle != NULL);
int32_t code = walReadVer(pWalHandle, index);
// int32_t code = walReadVerCached(pWalHandle, index);
if (code != 0) {
int32_t err = terrno;
const char* errStr = tstrerror(err);
int32_t sysErr = errno;
const char* sysErrStr = strerror(errno);
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
index, err, err, errStr, sysErr, sysErrStr);
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
// syncNodeEventLog(pData->pSyncNode, logBuf);
} else {
syncNodeErrorLog(pData->pSyncNode, logBuf);
}
} while (0);
sError("failed to read ver since %s. index:%" PRId64 "", tstrerror(terrno), index);
return NULL;
}
SSyncRaftEntry* pEntry = syncEntryBuild(pWalHandle->pHead->head.bodyLen);
ASSERT(pEntry != NULL);
pEntry->msgType = TDMT_SYNC_CLIENT_REQUEST;
pEntry->originalRpcType = pWalHandle->pHead->head.msgType;
pEntry->seqNum = pWalHandle->pHead->head.syncMeta.seqNum;
pEntry->isWeak = pWalHandle->pHead->head.syncMeta.isWeek;
pEntry->term = pWalHandle->pHead->head.syncMeta.term;
pEntry->index = index;
ASSERT(pEntry->dataLen == pWalHandle->pHead->head.bodyLen);
memcpy(pEntry->data, pWalHandle->pHead->head.body, pWalHandle->pHead->head.bodyLen);
/*
int32_t saveErr = terrno;
walCloseReadHandle(pWalHandle);
terrno = saveErr;
*/
return pEntry;
} else {
return NULL;
}
}
SSyncRaftEntry* logStoreGetEntry(SSyncLogStore* pLogStore, SyncIndex index) {
SSyncLogStoreData* pData = pLogStore->data;
SSyncRaftEntry *pEntry = NULL;
taosThreadMutexLock(&pData->mutex);
pEntry = logStoreGetEntryWithoutLock(pLogStore, index);
taosThreadMutexUnlock(&pData->mutex);
return pEntry;
}
int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex fromIndex) {
SSyncLogStoreData* pData = pLogStore->data;
SWal* pWal = pData->pWal;
// ASSERT(walRollback(pWal, fromIndex) == 0);
int32_t code = walRollback(pWal, fromIndex);
if (code != 0) {
int32_t err = terrno;
const char* errStr = tstrerror(err);
int32_t sysErr = errno;
const char* sysErrStr = strerror(errno);
sError("vgId:%d, wal truncate error, from-index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
pData->pSyncNode->vgId, fromIndex, err, err, errStr, sysErr, sysErrStr);
ASSERT(0);
}
// event log
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "wal truncate, from-index:%" PRId64, fromIndex);
syncNodeEventLog(pData->pSyncNode, logBuf);
} while (0);
return 0;
}
SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore) {
SSyncLogStoreData* pData = pLogStore->data;
SWal* pWal = pData->pWal;
SyncIndex lastIndex = walGetLastVer(pWal);
return lastIndex;
}
SyncTerm logStoreLastTerm(SSyncLogStore* pLogStore) {
SyncTerm lastTerm = 0;
SSyncRaftEntry* pLastEntry = logStoreGetLastEntry(pLogStore);
if (pLastEntry != NULL) {
lastTerm = pLastEntry->term;
taosMemoryFree(pLastEntry);
}
return lastTerm;
}
int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) {
int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) {
SSyncLogStoreData* pData = pLogStore->data;
SWal* pWal = pData->pWal;
// ASSERT(walCommit(pWal, index) == 0);
......@@ -540,23 +380,11 @@ int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) {
return 0;
}
SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore) {
SyncIndex raftlogCommitIndex(SSyncLogStore* pLogStore) {
SSyncLogStoreData* pData = pLogStore->data;
return pData->pSyncNode->commitIndex;
}
SSyncRaftEntry* logStoreGetLastEntry(SSyncLogStore* pLogStore) {
SSyncLogStoreData* pData = pLogStore->data;
SWal* pWal = pData->pWal;
SyncIndex lastIndex = walGetLastVer(pWal);
SSyncRaftEntry* pEntry = NULL;
if (lastIndex > 0) {
pEntry = logStoreGetEntry(pLogStore, lastIndex);
}
return pEntry;
}
cJSON* logStore2Json(SSyncLogStore* pLogStore) {
char u64buf[128] = {0};
SSyncLogStoreData* pData = (SSyncLogStoreData*)pLogStore->data;
......@@ -595,7 +423,9 @@ cJSON* logStore2Json(SSyncLogStore* pLogStore) {
if (!raftLogIsEmpty(pLogStore)) {
for (SyncIndex i = beginIndex; i <= endIndex; ++i) {
SSyncRaftEntry* pEntry = logStoreGetEntry(pLogStore, i);
SSyncRaftEntry* pEntry = NULL;
raftLogGetEntry(pLogStore, i, &pEntry);
cJSON_AddItemToArray(pEntries, syncEntry2Json(pEntry));
syncEntryDestory(pEntry);
}
......@@ -675,14 +505,14 @@ SyncIndex logStoreWalCommitVer(SSyncLogStore* pLogStore) {
// for debug -----------------
void logStorePrint(SSyncLogStore* pLogStore) {
char* serialized = logStore2Str(pLogStore);
printf("logStorePrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("logStorePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void logStorePrint2(char* s, SSyncLogStore* pLogStore) {
char* serialized = logStore2Str(pLogStore);
printf("logStorePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("logStorePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
......@@ -690,7 +520,7 @@ void logStorePrint2(char* s, SSyncLogStore* pLogStore) {
void logStoreLog(SSyncLogStore* pLogStore) {
if (gRaftDetailLog) {
char* serialized = logStore2Str(pLogStore);
sTraceLong("logStoreLog | len:%lu | %s", strlen(serialized), serialized);
sTraceLong("logStoreLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
}
......@@ -698,7 +528,7 @@ void logStoreLog(SSyncLogStore* pLogStore) {
void logStoreLog2(char* s, SSyncLogStore* pLogStore) {
if (gRaftDetailLog) {
char* serialized = logStore2Str(pLogStore);
sTraceLong("logStoreLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTraceLong("logStoreLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -706,28 +536,28 @@ void logStoreLog2(char* s, SSyncLogStore* pLogStore) {
// for debug -----------------
void logStoreSimplePrint(SSyncLogStore* pLogStore) {
char* serialized = logStoreSimple2Str(pLogStore);
printf("logStoreSimplePrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("logStoreSimplePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void logStoreSimplePrint2(char* s, SSyncLogStore* pLogStore) {
char* serialized = logStoreSimple2Str(pLogStore);
printf("logStoreSimplePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("logStoreSimplePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void logStoreSimpleLog(SSyncLogStore* pLogStore) {
char* serialized = logStoreSimple2Str(pLogStore);
sTrace("logStoreSimpleLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("logStoreSimpleLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void logStoreSimpleLog2(char* s, SSyncLogStore* pLogStore) {
if (gRaftDetailLog) {
char* serialized = logStoreSimple2Str(pLogStore);
sTrace("logStoreSimpleLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("logStoreSimpleLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
}
......@@ -226,25 +226,25 @@ char *raftStore2Str(SRaftStore *pRaftStore) {
// for debug -------------------
void raftStorePrint(SRaftStore *pObj) {
char *serialized = raftStore2Str(pObj);
printf("raftStorePrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("raftStorePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void raftStorePrint2(char *s, SRaftStore *pObj) {
char *serialized = raftStore2Str(pObj);
printf("raftStorePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("raftStorePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void raftStoreLog(SRaftStore *pObj) {
char *serialized = raftStore2Str(pObj);
sTrace("raftStoreLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("raftStoreLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void raftStoreLog2(char *s, SRaftStore *pObj) {
char *serialized = raftStore2Str(pObj);
sTrace("raftStoreLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("raftStoreLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
......@@ -47,39 +47,39 @@
// msource |-> i,
// mdest |-> j])
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
//
int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_LEADER);
syncIndexMgrLog2("==syncNodeAppendEntriesPeers== pNextIndex", pSyncNode->pNextIndex);
syncIndexMgrLog2("==syncNodeAppendEntriesPeers== pMatchIndex", pSyncNode->pMatchIndex);
logStoreSimpleLog2("==syncNodeAppendEntriesPeers==", pSyncNode->pLogStore);
int32_t ret = 0;
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SRaftId* pDestId = &(pSyncNode->peersId[i]);
// set prevLogIndex
int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId) {
// next index
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
SyncIndex preLogIndex = nextIndex - 1;
// set preLogTerm
SyncTerm preLogTerm = 0;
if (preLogIndex >= SYNC_INDEX_BEGIN) {
SSyncRaftEntry* pPreEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, preLogIndex);
ASSERT(pPreEntry != NULL);
// maybe start snapshot
SyncIndex logStartIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
SyncIndex logEndIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
if (nextIndex < logStartIndex || nextIndex - 1 > logEndIndex) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "start snapshot for next-index:%" PRId64 ", start:%" PRId64 ", end:%" PRId64,
nextIndex, logStartIndex, logEndIndex);
syncNodeEventLog(pSyncNode, logBuf);
preLogTerm = pPreEntry->term;
syncEntryDestory(pPreEntry);
// start snapshot
int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId);
ASSERT(code == 0);
return 0;
}
// batch optimized
// SyncIndex lastIndex = syncUtilMinIndex(pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore), nextIndex);
// pre index, pre term
SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
// prepare entry
SyncAppendEntries* pMsg = NULL;
SSyncRaftEntry* pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, nextIndex);
if (pEntry != NULL) {
SSyncRaftEntry* pEntry;
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry);
if (code == 0) {
ASSERT(pEntry != NULL);
pMsg = syncAppendEntriesBuild(pEntry->bytes, pSyncNode->vgId);
ASSERT(pMsg != NULL);
......@@ -93,92 +93,29 @@ int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode) {
syncEntryDestory(pEntry);
} else {
// maybe overflow, send empty record
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
// no entry in log
pMsg = syncAppendEntriesBuild(0, pSyncNode->vgId);
ASSERT(pMsg != NULL);
}
ASSERT(pMsg != NULL);
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = *pDestId;
pMsg->term = pSyncNode->pRaftStore->currentTerm;
pMsg->prevLogIndex = preLogIndex;
pMsg->prevLogTerm = preLogTerm;
pMsg->commitIndex = pSyncNode->commitIndex;
syncAppendEntriesLog2("==syncNodeAppendEntriesPeers==", pMsg);
// send AppendEntries
syncNodeAppendEntries(pSyncNode, pDestId, pMsg);
syncAppendEntriesDestroy(pMsg);
}
return ret;
}
int32_t syncNodeAppendEntriesOnePeer(SSyncNode* pSyncNode, SRaftId* pDestId, SyncIndex nextIndex) {
int32_t ret = 0;
// pre index, pre term
SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
if (preLogTerm == SYNC_TERM_INVALID) {
SyncIndex newNextIndex = syncNodeGetLastIndex(pSyncNode) + 1;
// SyncIndex newNextIndex = nextIndex + 1;
syncIndexMgrSetIndex(pSyncNode->pNextIndex, pDestId, newNextIndex);
syncIndexMgrSetIndex(pSyncNode->pMatchIndex, pDestId, SYNC_INDEX_INVALID);
sError("vgId:%d, sync get pre term error, nextIndex:%" PRId64 ", update next-index:%" PRId64
", match-index:%d, raftid:%" PRId64,
pSyncNode->vgId, nextIndex, newNextIndex, SYNC_INDEX_INVALID, pDestId->addr);
return -1;
}
// entry pointer array
SSyncRaftEntry* entryPArr[SYNC_MAX_BATCH_SIZE];
memset(entryPArr, 0, sizeof(entryPArr));
// get entry batch
int32_t getCount = 0;
SyncIndex getEntryIndex = nextIndex;
for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) {
SSyncRaftEntry* pEntry = NULL;
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, getEntryIndex, &pEntry);
if (code == 0) {
ASSERT(pEntry != NULL);
entryPArr[i] = pEntry;
getCount++;
getEntryIndex++;
} else {
break;
}
}
// event log
do {
char logBuf[128];
char host[64];
uint16_t port;
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
snprintf(logBuf, sizeof(logBuf), "build batch:%d for %s:%d", getCount, host, port);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
// build msg
SyncAppendEntriesBatch* pMsg = syncAppendEntriesBatchBuild(entryPArr, getCount, pSyncNode->vgId);
ASSERT(pMsg != NULL);
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "replicate to %s:%d error, next-index:%" PRId64, host, port, nextIndex);
syncNodeErrorLog(pSyncNode, logBuf);
} while (0);
// free entries
for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) {
SSyncRaftEntry* pEntry = entryPArr[i];
if (pEntry != NULL) {
syncEntryDestory(pEntry);
entryPArr[i] = NULL;
syncAppendEntriesDestroy(pMsg);
return -1;
}
}
// prepare msg
ASSERT(pMsg != NULL);
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = *pDestId;
pMsg->term = pSyncNode->pRaftStore->currentTerm;
......@@ -186,293 +123,69 @@ int32_t syncNodeAppendEntriesOnePeer(SSyncNode* pSyncNode, SRaftId* pDestId, Syn
pMsg->prevLogTerm = preLogTerm;
pMsg->commitIndex = pSyncNode->commitIndex;
pMsg->privateTerm = 0;
pMsg->dataCount = getCount;
// pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId);
// send msg
syncNodeAppendEntriesBatch(pSyncNode, pDestId, pMsg);
// speed up
if (pMsg->dataCount > 0 && pSyncNode->commitIndex - pMsg->prevLogIndex > SYNC_SLOW_DOWN_RANGE) {
ret = 1;
#if 0
do {
char logBuf[128];
char host[64];
uint16_t port;
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
snprintf(logBuf, sizeof(logBuf), "maybe speed up for %s:%d, pre-index:%ld", host, port, pMsg->prevLogIndex);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
#endif
}
syncAppendEntriesBatchDestroy(pMsg);
syncNodeMaybeSendAppendEntries(pSyncNode, pDestId, pMsg);
syncAppendEntriesDestroy(pMsg);
return ret;
return 0;
}
int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode) {
int32_t syncNodeReplicate(SSyncNode* pSyncNode) {
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
return -1;
}
int32_t ret = 0;
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SRaftId* pDestId = &(pSyncNode->peersId[i]);
// next index
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
ret = syncNodeAppendEntriesOnePeer(pSyncNode, pDestId, nextIndex);
}
return ret;
}
#if 0
int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode) {
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
return -1;
}
syncNodeEventLog(pSyncNode, "do replicate");
int32_t ret = 0;
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SRaftId* pDestId = &(pSyncNode->peersId[i]);
// next index
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
// pre index, pre term
SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
if (preLogTerm == SYNC_TERM_INVALID) {
SyncIndex newNextIndex = syncNodeGetLastIndex(pSyncNode) + 1;
// SyncIndex newNextIndex = nextIndex + 1;
syncIndexMgrSetIndex(pSyncNode->pNextIndex, pDestId, newNextIndex);
syncIndexMgrSetIndex(pSyncNode->pMatchIndex, pDestId, SYNC_INDEX_INVALID);
sError("vgId:%d, sync get pre term error, nextIndex:%" PRId64 ", update next-index:%" PRId64
", match-index:%d, raftid:%" PRId64,
pSyncNode->vgId, nextIndex, newNextIndex, SYNC_INDEX_INVALID, pDestId->addr);
return -1;
}
// entry pointer array
SSyncRaftEntry* entryPArr[SYNC_MAX_BATCH_SIZE];
memset(entryPArr, 0, sizeof(entryPArr));
// get entry batch
int32_t getCount = 0;
SyncIndex getEntryIndex = nextIndex;
for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) {
SSyncRaftEntry* pEntry = NULL;
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, getEntryIndex, &pEntry);
if (code == 0) {
ASSERT(pEntry != NULL);
entryPArr[i] = pEntry;
getCount++;
getEntryIndex++;
} else {
break;
}
}
// event log
do {
char logBuf[128];
char host[64];
uint16_t port;
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
snprintf(logBuf, sizeof(logBuf), "build batch:%d for %s:%d", getCount, host, port);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
// build msg
SyncAppendEntriesBatch* pMsg = syncAppendEntriesBatchBuild(entryPArr, getCount, pSyncNode->vgId);
ASSERT(pMsg != NULL);
// free entries
for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) {
SSyncRaftEntry* pEntry = entryPArr[i];
if (pEntry != NULL) {
syncEntryDestory(pEntry);
entryPArr[i] = NULL;
}
}
// prepare msg
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = *pDestId;
pMsg->term = pSyncNode->pRaftStore->currentTerm;
pMsg->prevLogIndex = preLogIndex;
pMsg->prevLogTerm = preLogTerm;
pMsg->commitIndex = pSyncNode->commitIndex;
pMsg->privateTerm = 0;
pMsg->dataCount = getCount;
// send msg
syncNodeAppendEntriesBatch(pSyncNode, pDestId, pMsg);
// speed up
if (pMsg->dataCount > 0 && pSyncNode->commitIndex - pMsg->prevLogIndex > SYNC_SLOW_DOWN_RANGE) {
ret = 1;
#if 0
do {
char logBuf[128];
ret = syncNodeReplicateOne(pSyncNode, pDestId);
if (ret != 0) {
char host[64];
uint16_t port;
int16_t port;
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
snprintf(logBuf, sizeof(logBuf), "maybe speed up for %s:%d, pre-index:%ld", host, port, pMsg->prevLogIndex);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
#endif
sError("vgId:%d, do append entries error for %s:%d", pSyncNode->vgId, host, port);
}
syncAppendEntriesBatchDestroy(pMsg);
}
return ret;
return 0;
}
#endif
int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_LEADER);
syncIndexMgrLog2("begin append entries peers pNextIndex:", pSyncNode->pNextIndex);
syncIndexMgrLog2("begin append entries peers pMatchIndex:", pSyncNode->pMatchIndex);
logStoreSimpleLog2("begin append entries peers LogStore:", pSyncNode->pLogStore);
int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg) {
int32_t ret = 0;
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SRaftId* pDestId = &(pSyncNode->peersId[i]);
// next index
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
// pre index, pre term
SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
if (preLogTerm == SYNC_TERM_INVALID) {
SyncIndex newNextIndex = syncNodeGetLastIndex(pSyncNode) + 1;
// SyncIndex newNextIndex = nextIndex + 1;
syncIndexMgrSetIndex(pSyncNode->pNextIndex, pDestId, newNextIndex);
syncIndexMgrSetIndex(pSyncNode->pMatchIndex, pDestId, SYNC_INDEX_INVALID);
sError("vgId:%d, sync get pre term error, nextIndex:%" PRId64 ", update next-index:%" PRId64
", match-index:%d, raftid:%" PRId64,
pSyncNode->vgId, nextIndex, newNextIndex, SYNC_INDEX_INVALID, pDestId->addr);
return -1;
}
// prepare entry
SyncAppendEntries* pMsg = NULL;
SSyncRaftEntry* pEntry;
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry);
if (code == 0) {
ASSERT(pEntry != NULL);
pMsg = syncAppendEntriesBuild(pEntry->bytes, pSyncNode->vgId);
ASSERT(pMsg != NULL);
// add pEntry into msg
uint32_t len;
char* serialized = syncEntrySerialize(pEntry, &len);
ASSERT(len == pEntry->bytes);
memcpy(pMsg->data, serialized, len);
taosMemoryFree(serialized);
syncEntryDestory(pEntry);
} else {
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
// no entry in log
pMsg = syncAppendEntriesBuild(0, pSyncNode->vgId);
ASSERT(pMsg != NULL);
syncLogSendAppendEntries(pSyncNode, pMsg, "");
} else {
syncNodeLog3("", pSyncNode);
ASSERT(0);
}
}
SRpcMsg rpcMsg;
syncAppendEntries2RpcMsg(pMsg, &rpcMsg);
syncNodeSendMsgById(destRaftId, pSyncNode, &rpcMsg);
// prepare msg
ASSERT(pMsg != NULL);
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = *pDestId;
pMsg->term = pSyncNode->pRaftStore->currentTerm;
pMsg->prevLogIndex = preLogIndex;
pMsg->prevLogTerm = preLogTerm;
pMsg->commitIndex = pSyncNode->commitIndex;
pMsg->privateTerm = 0;
// pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId);
SPeerState* pState = syncNodeGetPeerState(pSyncNode, destRaftId);
ASSERT(pState != NULL);
// send msg
syncNodeAppendEntries(pSyncNode, pDestId, pMsg);
syncAppendEntriesDestroy(pMsg);
if (pMsg->dataLen > 0) {
pState->lastSendIndex = pMsg->prevLogIndex + 1;
pState->lastSendTime = taosGetTimestampMs();
}
return ret;
}
int32_t syncNodeReplicate(SSyncNode* pSyncNode, bool isTimer) {
// start replicate
int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg) {
int32_t ret = 0;
switch (pSyncNode->pRaftCfg->snapshotStrategy) {
case SYNC_STRATEGY_NO_SNAPSHOT:
ret = syncNodeAppendEntriesPeers(pSyncNode);
break;
case SYNC_STRATEGY_STANDARD_SNAPSHOT:
ret = syncNodeAppendEntriesPeersSnapshot(pSyncNode);
break;
case SYNC_STRATEGY_WAL_FIRST:
ret = syncNodeAppendEntriesPeersSnapshot2(pSyncNode);
break;
default:
ret = syncNodeAppendEntriesPeers(pSyncNode);
break;
}
// start delay
int64_t timeNow = taosGetTimestampMs();
int64_t startDelay = timeNow - pSyncNode->startTime;
// replicate delay
int64_t replicateDelay = timeNow - pSyncNode->lastReplicateTime;
pSyncNode->lastReplicateTime = timeNow;
if (ret > 0 && isTimer && startDelay > SYNC_SPEED_UP_AFTER_MS) {
// speed up replicate
int32_t ms =
pSyncNode->heartbeatTimerMS < SYNC_SPEED_UP_HB_TIMER ? pSyncNode->heartbeatTimerMS : SYNC_SPEED_UP_HB_TIMER;
syncNodeRestartNowHeartbeatTimerMS(pSyncNode, ms);
#if 0
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "replicate speed up");
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
#endif
if (syncNodeNeedSendAppendEntries(pSyncNode, destRaftId, pMsg)) {
ret = syncNodeSendAppendEntries(pSyncNode, destRaftId, pMsg);
} else {
syncNodeRestartHeartbeatTimer(pSyncNode);
#if 0
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "replicate slow down");
char host[64];
int16_t port;
syncUtilU642Addr(destRaftId->addr, host, sizeof(host), &port);
snprintf(logBuf, sizeof(logBuf), "do not repcate to %s:%d for index:%" PRId64, host, port, pMsg->prevLogIndex + 1);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
#endif
}
return ret;
......@@ -488,12 +201,34 @@ int32_t syncNodeAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, c
return ret;
}
int32_t syncNodeAppendEntriesBatch(SSyncNode* pSyncNode, const SRaftId* destRaftId,
const SyncAppendEntriesBatch* pMsg) {
syncLogSendAppendEntriesBatch(pSyncNode, pMsg, "");
int32_t syncNodeSendHeartbeat(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncHeartbeat* pMsg) {
int32_t ret = 0;
syncLogSendHeartbeat(pSyncNode, pMsg, "");
SRpcMsg rpcMsg;
syncAppendEntriesBatch2RpcMsg(pMsg, &rpcMsg);
syncNodeSendMsgById(destRaftId, pSyncNode, &rpcMsg);
syncHeartbeat2RpcMsg(pMsg, &rpcMsg);
syncNodeSendMsgById(&(pMsg->destId), pSyncNode, &rpcMsg);
return ret;
}
int32_t syncNodeHeartbeatPeers(SSyncNode* pSyncNode) {
for (int32_t i = 0; i < pSyncNode->peersNum; ++i) {
SyncHeartbeat* pSyncMsg = syncHeartbeatBuild(pSyncNode->vgId);
pSyncMsg->srcId = pSyncNode->myRaftId;
pSyncMsg->destId = pSyncNode->peersId[i];
pSyncMsg->term = pSyncNode->pRaftStore->currentTerm;
pSyncMsg->commitIndex = pSyncNode->commitIndex;
pSyncMsg->minMatchIndex = syncMinMatchIndex(pSyncNode);
pSyncMsg->privateTerm = 0;
SRpcMsg rpcMsg;
syncHeartbeat2RpcMsg(pSyncMsg, &rpcMsg);
// send msg
syncNodeSendHeartbeat(pSyncNode, &(pSyncMsg->destId), pSyncMsg);
syncHeartbeatDestroy(pSyncMsg);
}
return 0;
}
\ No newline at end of file
......@@ -42,65 +42,6 @@
// m)
// /\ UNCHANGED <<state, currentTerm, candidateVars, leaderVars, logVars>>
//
int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg) {
int32_t ret = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
syncLogRecvRequestVote(ths, pMsg, "maybe replica already dropped");
return -1;
}
bool logOK = (pMsg->lastLogTerm > ths->pLogStore->getLastTerm(ths->pLogStore)) ||
((pMsg->lastLogTerm == ths->pLogStore->getLastTerm(ths->pLogStore)) &&
(pMsg->lastLogIndex >= ths->pLogStore->getLastIndex(ths->pLogStore)));
// maybe update term
if (pMsg->term > ths->pRaftStore->currentTerm) {
syncNodeUpdateTerm(ths, pMsg->term);
#if 0
if (logOK) {
syncNodeUpdateTerm(ths, pMsg->term);
} else {
syncNodeUpdateTermWithoutStepDown(ths, pMsg->term);
}
#endif
}
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
bool grant = (pMsg->term == ths->pRaftStore->currentTerm) && logOK &&
((!raftStoreHasVoted(ths->pRaftStore)) || (syncUtilSameId(&(ths->pRaftStore->voteFor), &(pMsg->srcId))));
if (grant) {
// maybe has already voted for pMsg->srcId
// vote again, no harm
raftStoreVote(ths->pRaftStore, &(pMsg->srcId));
// forbid elect for this round
syncNodeResetElectTimer(ths);
}
// send msg
SyncRequestVoteReply* pReply = syncRequestVoteReplyBuild(ths->vgId);
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->pRaftStore->currentTerm;
pReply->voteGranted = grant;
// trace log
do {
char logBuf[32];
snprintf(logBuf, sizeof(logBuf), "grant:%d", pReply->voteGranted);
syncLogRecvRequestVote(ths, pMsg, logBuf);
syncLogSendRequestVoteReply(ths, pReply, "");
} while (0);
SRpcMsg rpcMsg;
syncRequestVoteReply2RpcMsg(pReply, &rpcMsg);
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
syncRequestVoteReplyDestroy(pReply);
return ret;
}
static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pMsg) {
SyncTerm myLastTerm = syncNodeGetLastTerm(pSyncNode);
......@@ -157,12 +98,12 @@ static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pM
return false;
}
int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) {
int32_t syncNodeOnRequestVote(SSyncNode* ths, SyncRequestVote* pMsg) {
int32_t ret = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
syncLogRecvRequestVote(ths, pMsg, "maybe replica already dropped");
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) {
syncLogRecvRequestVote(ths, pMsg, "not in my config");
return -1;
}
......@@ -170,14 +111,8 @@ int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) {
// maybe update term
if (pMsg->term > ths->pRaftStore->currentTerm) {
syncNodeUpdateTerm(ths, pMsg->term);
#if 0
if (logOK) {
syncNodeUpdateTerm(ths, pMsg->term);
} else {
syncNodeUpdateTermWithoutStepDown(ths, pMsg->term);
}
#endif
syncNodeStepDown(ths, pMsg->term);
// syncNodeUpdateTerm(ths, pMsg->term);
}
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
......@@ -188,6 +123,9 @@ int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) {
// vote again, no harm
raftStoreVote(ths->pRaftStore, &(pMsg->srcId));
// candidate ?
syncNodeStepDown(ths, ths->pRaftStore->currentTerm);
// forbid elect for this round
syncNodeResetElectTimer(ths);
}
......
......@@ -37,68 +37,12 @@
// /\ Discard(m)
// /\ UNCHANGED <<serverVars, votedFor, leaderVars, logVars>>
//
int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) {
int32_t syncNodeOnRequestVoteReply(SSyncNode* ths, SyncRequestVoteReply* pMsg) {
int32_t ret = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
syncLogRecvRequestVoteReply(ths, pMsg, "maybe replica already dropped");
return -1;
}
// drop stale response
if (pMsg->term < ths->pRaftStore->currentTerm) {
syncLogRecvRequestVoteReply(ths, pMsg, "drop stale response");
return -1;
}
// ASSERT(!(pMsg->term > ths->pRaftStore->currentTerm));
// no need this code, because if I receive reply.term, then I must have sent for that term.
// if (pMsg->term > ths->pRaftStore->currentTerm) {
// syncNodeUpdateTerm(ths, pMsg->term);
// }
if (pMsg->term > ths->pRaftStore->currentTerm) {
syncLogRecvRequestVoteReply(ths, pMsg, "error term");
return -1;
}
syncLogRecvRequestVoteReply(ths, pMsg, "");
ASSERT(pMsg->term == ths->pRaftStore->currentTerm);
// This tallies votes even when the current state is not Candidate,
// but they won't be looked at, so it doesn't matter.
if (ths->state == TAOS_SYNC_STATE_CANDIDATE) {
votesRespondAdd(ths->pVotesRespond, pMsg);
if (pMsg->voteGranted) {
// add vote
voteGrantedVote(ths->pVotesGranted, pMsg);
// maybe to leader
if (voteGrantedMajority(ths->pVotesGranted)) {
if (!ths->pVotesGranted->toLeader) {
syncNodeCandidate2Leader(ths);
// prevent to leader again!
ths->pVotesGranted->toLeader = true;
}
}
} else {
;
// do nothing
// UNCHANGED <<votesGranted, voterLog>>
}
}
return 0;
}
int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) {
int32_t ret = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
syncLogRecvRequestVoteReply(ths, pMsg, "maybe replica already dropped");
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) {
syncLogRecvRequestVoteReply(ths, pMsg, "not in my config");
return -1;
}
......@@ -116,6 +60,7 @@ int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteRepl
if (pMsg->term > ths->pRaftStore->currentTerm) {
syncLogRecvRequestVoteReply(ths, pMsg, "error term");
syncNodeStepDown(ths, pMsg->term);
return -1;
}
......
......@@ -136,7 +136,7 @@ void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) {
while (pStub) {
size_t len;
void *key = taosHashGetKey(pStub, &len);
void * key = taosHashGetKey(pStub, &len);
uint64_t *pSeqNum = (uint64_t *)key;
sum++;
......
......@@ -41,6 +41,8 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI
}
memset(pSender, 0, sizeof(*pSender));
int64_t timeNow = taosGetTimestampMs();
pSender->start = false;
pSender->seq = SYNC_SNAPSHOT_SEQ_INVALID;
pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID;
......@@ -51,7 +53,8 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI
pSender->pSyncNode = pSyncNode;
pSender->replicaIndex = replicaIndex;
pSender->term = pSyncNode->pRaftStore->currentTerm;
pSender->privateTerm = taosGetTimestampMs() + 100;
pSender->privateTerm = timeNow + 100;
pSender->startTime = timeNow;
pSender->pSyncNode->pFsm->FpGetSnapshotInfo(pSender->pSyncNode->pFsm, &(pSender->snapshot));
pSender->finish = false;
} else {
......@@ -402,6 +405,24 @@ char *snapshotSender2SimpleStr(SSyncSnapshotSender *pSender, char *event) {
return s;
}
int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) {
// calculate <start, end> index
syncNodeEventLog(pSyncNode, "start snapshot ...");
SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, pDestId);
if (pSender == NULL) {
// create sender
} else {
// if <start, end> is same
// return 0;
}
// send begin msg
return 0;
}
// -------------------------------------
SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId fromId) {
bool condition = (pSyncNode->pFsm->FpSnapshotStartWrite != NULL) && (pSyncNode->pFsm->FpSnapshotStopWrite != NULL) &&
......@@ -721,7 +742,7 @@ char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event)
// condition 3, recv SYNC_SNAPSHOT_SEQ_FORCE_CLOSE, force close
// condition 4, got data, update ack
//
int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) {
int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) {
// get receiver
SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver;
bool needRsp = false;
......@@ -834,7 +855,7 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) {
// condition 2 sender receives ack, set seq = ack + 1, send msg from seq
// condition 3 sender receives error msg, just print error log
//
int32_t syncNodeOnSnapshotRspCb(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) {
int32_t syncNodeOnSnapshotReply(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) {
// if already drop replica, do not process
if (!syncNodeInRaftGroup(pSyncNode, &(pMsg->srcId)) && pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
sError("vgId:%d, recv sync-snapshot-rsp, maybe replica already dropped", pSyncNode->vgId);
......
......@@ -16,6 +16,7 @@
#include "syncTimeout.h"
#include "syncElection.h"
#include "syncRaftCfg.h"
#include "syncRaftLog.h"
#include "syncReplication.h"
#include "syncRespMgr.h"
......@@ -60,12 +61,36 @@ static void syncNodeCleanConfigIndex(SSyncNode* ths) {
int32_t syncNodeTimerRoutine(SSyncNode* ths) {
syncNodeEventLog(ths, "timer routines");
if (ths->vgId == 1) {
// timer replicate
syncNodeReplicate(ths);
// clean mnode index
if (syncNodeIsMnode(ths)) {
syncNodeCleanConfigIndex(ths);
}
// end timeout wal snapshot
int64_t timeNow = taosGetTimestampMs();
if (timeNow - ths->snapshottingIndex > SYNC_DEL_WAL_MS &&
atomic_load_64(&ths->snapshottingIndex) != SYNC_INDEX_INVALID) {
SSyncLogStoreData* pData = ths->pLogStore->data;
int32_t code = walEndSnapshot(pData->pWal);
if (code != 0) {
sError("vgId:%d, wal snapshot end error since:%s", ths->vgId, terrstr(terrno));
return -1;
} else {
do {
char logBuf[256];
snprintf(logBuf, sizeof(logBuf), "wal snapshot end, index:%" PRId64, atomic_load_64(&ths->snapshottingIndex));
syncNodeEventLog(ths, logBuf);
} while (0);
atomic_store_64(&ths->snapshottingIndex, SYNC_INDEX_INVALID);
}
}
#if 0
if (ths->vgId != 1) {
if (!syncNodeIsMnode(ths)) {
syncRespClean(ths->pSyncRespMgr);
}
#endif
......@@ -73,9 +98,9 @@ int32_t syncNodeTimerRoutine(SSyncNode* ths) {
return 0;
}
int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg) {
int32_t syncNodeOnTimer(SSyncNode* ths, SyncTimeout* pMsg) {
int32_t ret = 0;
syncTimeoutLog2("==syncNodeOnTimeoutCb==", pMsg);
syncLogRecvTimer(ths, pMsg, "");
if (pMsg->timeoutType == SYNC_TIMEOUT_PING) {
if (atomic_load_64(&ths->pingTimerLogicClockUser) <= pMsg->logicClock) {
......@@ -84,27 +109,29 @@ int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg) {
// syncNodePingAll(ths);
// syncNodePingPeers(ths);
// sTrace("vgId:%d, sync timeout, type:ping count:%d", ths->vgId, ths->pingTimerCounter);
syncNodeTimerRoutine(ths);
}
} else if (pMsg->timeoutType == SYNC_TIMEOUT_ELECTION) {
if (atomic_load_64(&ths->electTimerLogicClockUser) <= pMsg->logicClock) {
++(ths->electTimerCounter);
sTrace("vgId:%d, sync timer, type:election count:%" PRId64 ", electTimerLogicClockUser:%" PRId64 "", ths->vgId,
sTrace("vgId:%d, sync timer, type:election count:%" PRIu64 ", lc-user:%" PRIu64, ths->vgId,
ths->electTimerCounter, ths->electTimerLogicClockUser);
syncNodeElect(ths);
}
} else if (pMsg->timeoutType == SYNC_TIMEOUT_HEARTBEAT) {
if (atomic_load_64(&ths->heartbeatTimerLogicClockUser) <= pMsg->logicClock) {
++(ths->heartbeatTimerCounter);
sTrace("vgId:%d, sync timer, type:replicate count:%" PRId64 ", heartbeatTimerLogicClockUser:%" PRId64 "",
ths->vgId, ths->heartbeatTimerCounter, ths->heartbeatTimerLogicClockUser);
syncNodeReplicate(ths, true);
sTrace("vgId:%d, sync timer, type:replicate count:%" PRIu64 ", lc-user:%" PRIu64, ths->vgId,
ths->heartbeatTimerCounter, ths->heartbeatTimerLogicClockUser);
// syncNodeReplicate(ths, true);
}
} else {
sError("vgId:%d, unknown timeout-type:%d", ths->vgId, pMsg->timeoutType);
sError("vgId:%d, recv unknown timer-type:%d", ths->vgId, pMsg->timeoutType);
}
return ret;
......
......@@ -138,27 +138,27 @@ char *voteGranted2Str(SVotesGranted *pVotesGranted) {
// for debug -------------------
void voteGrantedPrint(SVotesGranted *pObj) {
char *serialized = voteGranted2Str(pObj);
printf("voteGrantedPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("voteGrantedPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void voteGrantedPrint2(char *s, SVotesGranted *pObj) {
char *serialized = voteGranted2Str(pObj);
printf("voteGrantedPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("voteGrantedPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void voteGrantedLog(SVotesGranted *pObj) {
char *serialized = voteGranted2Str(pObj);
sTrace("voteGrantedLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("voteGrantedLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void voteGrantedLog2(char *s, SVotesGranted *pObj) {
char *serialized = voteGranted2Str(pObj);
sTrace("voteGrantedLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("voteGrantedLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
......@@ -267,26 +267,26 @@ char *votesRespond2Str(SVotesRespond *pVotesRespond) {
// for debug -------------------
void votesRespondPrint(SVotesRespond *pObj) {
char *serialized = votesRespond2Str(pObj);
printf("votesRespondPrint | len:%lu | %s \n", strlen(serialized), serialized);
printf("votesRespondPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void votesRespondPrint2(char *s, SVotesRespond *pObj) {
char *serialized = votesRespond2Str(pObj);
printf("votesRespondPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
printf("votesRespondPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized);
fflush(NULL);
taosMemoryFree(serialized);
}
void votesRespondLog(SVotesRespond *pObj) {
char *serialized = votesRespond2Str(pObj);
sTrace("votesRespondLog | len:%lu | %s", strlen(serialized), serialized);
sTrace("votesRespondLog | len:%d | %s", (int32_t)strlen(serialized), serialized);
taosMemoryFree(serialized);
}
void votesRespondLog2(char *s, SVotesRespond *pObj) {
char *serialized = votesRespond2Str(pObj);
sTrace("votesRespondLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
sTrace("votesRespondLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized);
taosMemoryFree(serialized);
}
......@@ -237,8 +237,8 @@ int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal*
gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries;
gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply;
gSyncIO->FpOnSyncSnapshotSend = pSyncNode->FpOnSnapshotSend;
gSyncIO->FpOnSyncSnapshotRsp = pSyncNode->FpOnSnapshotRsp;
gSyncIO->FpOnSyncSnapshot = pSyncNode->FpOnSnapshot;
gSyncIO->FpOnSyncSnapshotReply = pSyncNode->FpOnSnapshotReply;
gSyncIO->pSyncNode = pSyncNode;
syncNodeRelease(pSyncNode);
......
......@@ -181,8 +181,11 @@ int main(int argc, char **argv) {
SSyncNode *pSyncNode = syncNodeInit();
assert(pSyncNode != NULL);
SSyncRaftEntry *pEntry = pMsg4;
pSyncNode->pLogStore->appendEntry(pSyncNode->pLogStore, pEntry);
SSyncRaftEntry *pEntry2 = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, pEntry->index);
pSyncNode->pLogStore->syncLogAppendEntry(pSyncNode->pLogStore, pEntry);
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, pEntry->index, &pEntry);
ASSERT(code == 0);
syncEntryLog2((char *)"==pEntry2==", pEntry2);
// step5
......
......@@ -36,7 +36,7 @@ void test1() {
void test2() {
SyncHeartbeatReply *pMsg = createMsg();
uint32_t len = pMsg->bytes;
char *serialized = (char *)taosMemoryMalloc(len);
char * serialized = (char *)taosMemoryMalloc(len);
syncHeartbeatReplySerialize(pMsg, serialized, len);
SyncHeartbeatReply *pMsg2 = syncHeartbeatReplyBuild(1000);
syncHeartbeatReplyDeserialize(serialized, len, pMsg2);
......@@ -50,7 +50,7 @@ void test2() {
void test3() {
SyncHeartbeatReply *pMsg = createMsg();
uint32_t len;
char *serialized = syncHeartbeatReplySerialize2(pMsg, &len);
char * serialized = syncHeartbeatReplySerialize2(pMsg, &len);
SyncHeartbeatReply *pMsg2 = syncHeartbeatReplyDeserialize2(serialized, len);
syncHeartbeatReplyLog2((char *)"test3: syncHeartbeatReplySerialize3 -> syncHeartbeatReplyDeserialize2 ", pMsg2);
......
......@@ -35,7 +35,7 @@ void test1() {
void test2() {
SyncHeartbeat *pMsg = createMsg();
uint32_t len = pMsg->bytes;
char *serialized = (char *)taosMemoryMalloc(len);
char * serialized = (char *)taosMemoryMalloc(len);
syncHeartbeatSerialize(pMsg, serialized, len);
SyncHeartbeat *pMsg2 = syncHeartbeatBuild(789);
syncHeartbeatDeserialize(serialized, len, pMsg2);
......@@ -49,7 +49,7 @@ void test2() {
void test3() {
SyncHeartbeat *pMsg = createMsg();
uint32_t len;
char *serialized = syncHeartbeatSerialize2(pMsg, &len);
char * serialized = syncHeartbeatSerialize2(pMsg, &len);
SyncHeartbeat *pMsg2 = syncHeartbeatDeserialize2(serialized, len);
syncHeartbeatLog2((char *)"test3: syncHeartbeatSerialize2 -> syncHeartbeatDeserialize2 ", pMsg2);
......
......@@ -52,7 +52,7 @@ void cleanup() {
void logStoreTest() {
pLogStore = logStoreCreate(pSyncNode);
assert(pLogStore);
assert(pLogStore->getLastIndex(pLogStore) == SYNC_INDEX_INVALID);
assert(pLogStore->syncLogLastIndex(pLogStore) == SYNC_INDEX_INVALID);
logStoreLog2((char*)"logStoreTest", pLogStore);
......@@ -65,22 +65,20 @@ void logStoreTest() {
pEntry->seqNum = 3;
pEntry->isWeak = true;
pEntry->term = 100 + i;
pEntry->index = pLogStore->getLastIndex(pLogStore) + 1;
pEntry->index = pLogStore->syncLogLastIndex(pLogStore) + 1;
snprintf(pEntry->data, dataLen, "value%d", i);
syncEntryLog2((char*)"==write entry== :", pEntry);
pLogStore->appendEntry(pLogStore, pEntry);
pLogStore->syncLogAppendEntry(pLogStore, pEntry);
syncEntryDestory(pEntry);
if (i == 0) {
assert(pLogStore->getLastIndex(pLogStore) == SYNC_INDEX_BEGIN);
assert(pLogStore->syncLogLastIndex(pLogStore) == SYNC_INDEX_BEGIN);
}
}
logStoreLog2((char*)"after appendEntry", pLogStore);
pLogStore->truncate(pLogStore, 3);
pLogStore->syncLogTruncate(pLogStore, 3);
logStoreLog2((char*)"after truncate 3", pLogStore);
logStoreDestory(pLogStore);
}
......
......@@ -266,14 +266,12 @@ int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal*
gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply;
gSyncIO->FpOnSyncTimeout = pSyncNode->FpOnTimeout;
gSyncIO->FpOnSyncClientRequest = pSyncNode->FpOnClientRequest;
gSyncIO->FpOnSyncRequestVote = pSyncNode->FpOnRequestVote;
gSyncIO->FpOnSyncRequestVoteReply = pSyncNode->FpOnRequestVoteReply;
gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries;
gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply;
gSyncIO->FpOnSyncSnapshotSend = pSyncNode->FpOnSnapshotSend;
gSyncIO->FpOnSyncSnapshotRsp = pSyncNode->FpOnSnapshotRsp;
gSyncIO->FpOnSyncSnapshot = pSyncNode->FpOnSnapshot;
gSyncIO->FpOnSyncSnapshotReply = pSyncNode->FpOnSnapshotReply;
gSyncIO->pSyncNode = pSyncNode;
syncNodeRelease(pSyncNode);
......
......@@ -141,14 +141,10 @@ int32_t taosQueueItemSize(STaosQueue *queue) {
}
int64_t taosQueueMemorySize(STaosQueue *queue) {
#if 1
return queue->memOfItems;
#else
taosThreadMutexLock(&queue->mutex);
int64_t memOfItems = queue->memOfItems;
taosThreadMutexUnlock(&queue->mutex);
return memOfItems;
#endif
}
void *taosAllocateQitem(int32_t size, EQItype itype) {
......
......@@ -131,7 +131,7 @@ class TDDnode:
"qDebugFlag": "143",
"rpcDebugFlag": "143",
"tmrDebugFlag": "131",
"uDebugFlag": "131",
"uDebugFlag": "143",
"sDebugFlag": "143",
"wDebugFlag": "143",
"numOfLogLines": "100000000",
......
......@@ -183,7 +183,7 @@ $x = 0
step71:
$x = $x + 1
sleep 1000
if $x == 10 then
if $x == 50 then
return -1
endi
sql select * from information_schema.ins_dnodes
......
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/deploy.sh -n dnode2 -i 2
system sh/deploy.sh -n dnode3 -i 3
system sh/deploy.sh -n dnode4 -i 4
system sh/cfg.sh -n dnode1 -c supportVnodes -v 0
system sh/exec.sh -n dnode1 -s start
system sh/exec.sh -n dnode2 -s start
system sh/exec.sh -n dnode3 -s start
system sh/exec.sh -n dnode4 -s start
sql connect
sql create dnode $hostname port 7200
sql create dnode $hostname port 7300
sql create dnode $hostname port 7400
$x = 0
step1:
$x = $x + 1
sleep 1000
if $x == 10 then
print ====> dnode not ready!
return -1
endi
sql select * from information_schema.ins_dnodes
print ===> $data00 $data01 $data02 $data03 $data04 $data05
print ===> $data10 $data11 $data12 $data13 $data14 $data15
print ===> $data20 $data21 $data22 $data23 $data24 $data25
print ===> $data30 $data31 $data32 $data33 $data34 $data35
if $rows != 4 then
return -1
endi
if $data(1)[4] != ready then
goto step1
endi
if $data(2)[4] != ready then
goto step1
endi
if $data(3)[4] != ready then
goto step1
endi
if $data(4)[4] != ready then
goto step1
endi
$replica = 3
$vgroups = 1
print ============= create database
sql create database db replica $replica vgroups $vgroups
$loop_cnt = 0
check_db_ready:
$loop_cnt = $loop_cnt + 1
sleep 200
if $loop_cnt == 100 then
print ====> db not ready!
return -1
endi
sql select * from information_schema.ins_databases
print ===> rows: $rows
print $data[2][0] $data[2][1] $data[2][2] $data[2][3] $data[2][4] $data[2][5] $data[2][6] $data[2][7] $data[2][8] $data[2][9] $data[2][6] $data[2][11] $data[2][12] $data[2][13] $data[2][14] $data[2][15] $data[2][16] $data[2][17] $data[2][18] $data[2][19]
if $rows != 3 then
return -1
endi
if $data[2][15] != ready then
goto check_db_ready
endi
sql use db
$loop_cnt = 0
check_vg_ready:
$loop_cnt = $loop_cnt + 1
sleep 200
if $loop_cnt == 300 then
print ====> vgroups not ready!
return -1
endi
sql show vgroups
print ===> rows: $rows
print $data[0][0] $data[0][1] $data[0][2] $data[0][3] $data[0][4] $data[0][5] $data[0][6] $data[0][7] $data[0][8] $data[0][9] $data[0][10] $data[0][11]
if $rows != $vgroups then
return -1
endi
if $data[0][4] == leader then
if $data[0][6] == follower then
if $data[0][8] == follower then
print ---- vgroup $data[0][0] leader locate on dnode $data[0][3]
endi
endi
elif $data[0][6] == leader then
if $data[0][4] == follower then
if $data[0][8] == follower then
print ---- vgroup $data[0][0] leader locate on dnode $data[0][5]
endi
endi
elif $data[0][8] == leader then
if $data[0][4] == follower then
if $data[0][6] == follower then
print ---- vgroup $data[0][0] leader locate on dnode $data[0][7]
endi
endi
else
goto check_vg_ready
endi
#return 0
vg_ready:
print ====> create stable/child table
sql create table stb (ts timestamp, c1 int, c2 float, c3 double) tags (t1 int)
#return 0
sql show stables
if $rows != 1 then
return -1
endi
sql create table ct1 using stb tags(1000)
print ====> step1 insert 1000 records
$N = 1000
$count = 0
while $count < $N
$ms = 1591200000000 + $count
sql insert into ct1 values( $ms , $count , 2.1, 3.1)
$count = $count + 1
endw
print ====> step2 sleep 20s, checking data
sleep 20000
print ====> step3 sleep 30s, kill leader
sleep 30000
print ====> step4 insert 1000 records
$N = 1000
$count = 0
while $count < $N
$ms = 1591201000000 + $count
sql insert into ct1 values( $ms , $count , 2.1, 3.1)
$count = $count + 1
endw
print ====> step5 sleep 20s, checking data
sleep 20000
......@@ -233,12 +233,12 @@ python3 ./test.py -f 6-cluster/5dnode2mnode.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeStop.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeStop2Follower.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeStopLoop.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopDnodeCreateDb.py -N 5 -M 3
# TD-19690 python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopDnodeCreateDb.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopMnodeCreateDb.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopVnodeCreateDb.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopMnodeCreateDbRep3.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopDnodeCreateStb.py -N 5 -M 3
# TD-19690 python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopDnodeCreateStb.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopMnodeCreateStb.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopVnodeCreateStb.py -N 5 -M 3
......@@ -249,8 +249,8 @@ python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertDataAsync.py -N 5 -
python3 ./test.py -f 6-cluster/5dnode3mnodeAdd1Ddnoe.py -N 6 -M 3 -C 5
# BUG python3 ./test.py -f 6-cluster/5dnode3mnodeStopInsert.py
python3 ./test.py -f 6-cluster/5dnode3mnodeDrop.py -N 5
python3 test.py -f 6-cluster/5dnode3mnodeStopConnect.py -N 5 -M 3
# python3 ./test.py -f 6-cluster/5dnode3mnodeDrop.py -N 5
#TD-19690 python3 test.py -f 6-cluster/5dnode3mnodeStopConnect.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeRecreateMnode.py -N 5 -M 3
python3 ./test.py -f 6-cluster/5dnode3mnodeStopFollowerLeader.py -N 5 -M 3
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册