Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
5965629b
TDengine
项目概览
taosdata
/
TDengine
大约 2 年 前同步成功
通知
1192
Star
22018
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5965629b
编写于
10月 20, 2022
作者:
M
Minghao Li
浏览文件
操作
浏览文件
下载
差异文件
refactor(sync): refacotr sync
上级
2e82fdee
27f690fd
变更
46
隐藏空白更改
内联
并排
Showing
46 changed file
with
1528 addition
and
2484 deletion
+1528
-2484
include/dnode/mnode/mnode.h
include/dnode/mnode/mnode.h
+1
-0
include/libs/sync/sync.h
include/libs/sync/sync.h
+15
-24
include/libs/sync/syncTools.h
include/libs/sync/syncTools.h
+17
-19
source/dnode/mgmt/mgmt_mnode/inc/mmInt.h
source/dnode/mgmt/mgmt_mnode/inc/mmInt.h
+2
-0
source/dnode/mgmt/mgmt_mnode/src/mmHandle.c
source/dnode/mgmt/mgmt_mnode/src/mmHandle.c
+3
-0
source/dnode/mgmt/mgmt_mnode/src/mmWorker.c
source/dnode/mgmt/mgmt_mnode/src/mmWorker.c
+38
-0
source/dnode/mgmt/mgmt_vnode/src/vmWorker.c
source/dnode/mgmt/mgmt_vnode/src/vmWorker.c
+2
-4
source/dnode/mnode/impl/src/mndMain.c
source/dnode/mnode/impl/src/mndMain.c
+96
-90
source/dnode/vnode/src/vnd/vnodeCommit.c
source/dnode/vnode/src/vnd/vnodeCommit.c
+6
-3
source/dnode/vnode/src/vnd/vnodeSync.c
source/dnode/vnode/src/vnd/vnodeSync.c
+80
-119
source/libs/sync/inc/syncAppendEntries.h
source/libs/sync/inc/syncAppendEntries.h
+2
-3
source/libs/sync/inc/syncAppendEntriesReply.h
source/libs/sync/inc/syncAppendEntriesReply.h
+1
-3
source/libs/sync/inc/syncElection.h
source/libs/sync/inc/syncElection.h
+2
-4
source/libs/sync/inc/syncEnv.h
source/libs/sync/inc/syncEnv.h
+6
-6
source/libs/sync/inc/syncIO.h
source/libs/sync/inc/syncIO.h
+2
-3
source/libs/sync/inc/syncIndexMgr.h
source/libs/sync/inc/syncIndexMgr.h
+2
-2
source/libs/sync/inc/syncInt.h
source/libs/sync/inc/syncInt.h
+61
-19
source/libs/sync/inc/syncRaftCfg.h
source/libs/sync/inc/syncRaftCfg.h
+7
-7
source/libs/sync/inc/syncReplication.h
source/libs/sync/inc/syncReplication.h
+7
-8
source/libs/sync/inc/syncRequestVote.h
source/libs/sync/inc/syncRequestVote.h
+1
-2
source/libs/sync/inc/syncRequestVoteReply.h
source/libs/sync/inc/syncRequestVoteReply.h
+1
-2
source/libs/sync/inc/syncRespMgr.h
source/libs/sync/inc/syncRespMgr.h
+2
-2
source/libs/sync/inc/syncSnapshot.h
source/libs/sync/inc/syncSnapshot.h
+8
-5
source/libs/sync/inc/syncTimeout.h
source/libs/sync/inc/syncTimeout.h
+1
-1
source/libs/sync/src/syncAppendEntries.c
source/libs/sync/src/syncAppendEntries.c
+171
-728
source/libs/sync/src/syncAppendEntriesReply.c
source/libs/sync/src/syncAppendEntriesReply.c
+23
-301
source/libs/sync/src/syncCommit.c
source/libs/sync/src/syncCommit.c
+18
-10
source/libs/sync/src/syncElection.c
source/libs/sync/src/syncElection.c
+25
-53
source/libs/sync/src/syncIO.c
source/libs/sync/src/syncIO.c
+4
-4
source/libs/sync/src/syncIndexMgr.c
source/libs/sync/src/syncIndexMgr.c
+1
-1
source/libs/sync/src/syncMain.c
source/libs/sync/src/syncMain.c
+518
-324
source/libs/sync/src/syncRaftCfg.c
source/libs/sync/src/syncRaftCfg.c
+7
-7
source/libs/sync/src/syncRaftLog.c
source/libs/sync/src/syncRaftLog.c
+26
-196
source/libs/sync/src/syncReplication.c
source/libs/sync/src/syncReplication.c
+107
-374
source/libs/sync/src/syncRequestVote.c
source/libs/sync/src/syncRequestVote.c
+6
-68
source/libs/sync/src/syncRequestVoteReply.c
source/libs/sync/src/syncRequestVoteReply.c
+2
-57
source/libs/sync/src/syncRespMgr.c
source/libs/sync/src/syncRespMgr.c
+1
-1
source/libs/sync/src/syncSnapshot.c
source/libs/sync/src/syncSnapshot.c
+24
-3
source/libs/sync/src/syncTimeout.c
source/libs/sync/src/syncTimeout.c
+39
-12
source/libs/sync/test/syncConfigChangeSnapshotTest.cpp
source/libs/sync/test/syncConfigChangeSnapshotTest.cpp
+2
-2
source/libs/sync/test/syncEncodeTest.cpp
source/libs/sync/test/syncEncodeTest.cpp
+5
-2
source/libs/sync/test/syncHeartbeatReplyTest.cpp
source/libs/sync/test/syncHeartbeatReplyTest.cpp
+2
-2
source/libs/sync/test/syncHeartbeatTest.cpp
source/libs/sync/test/syncHeartbeatTest.cpp
+2
-2
source/libs/sync/test/syncLogStoreTest.cpp
source/libs/sync/test/syncLogStoreTest.cpp
+5
-7
source/libs/sync/test/syncTestTool.cpp
source/libs/sync/test/syncTestTool.cpp
+2
-4
tests/script/tsim/sync/sync2-test.sim
tests/script/tsim/sync/sync2-test.sim
+175
-0
未找到文件。
include/dnode/mnode/mnode.h
浏览文件 @
5965629b
...
...
@@ -99,6 +99,7 @@ int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad);
*/
int32_t
mndProcessRpcMsg
(
SRpcMsg
*
pMsg
);
int32_t
mndProcessSyncMsg
(
SRpcMsg
*
pMsg
);
int32_t
mndProcessSyncCtrlMsg
(
SRpcMsg
*
pMsg
);
int32_t
mndPreProcessQueryMsg
(
SRpcMsg
*
pMsg
);
void
mndPostProcessQueryMsg
(
SRpcMsg
*
pMsg
);
...
...
include/libs/sync/sync.h
浏览文件 @
5965629b
...
...
@@ -35,7 +35,12 @@ extern bool gRaftDetailLog;
#define SYNC_MAX_PROGRESS_WAIT_MS 4000
#define SYNC_MAX_START_TIME_RANGE_MS (1000 * 20)
#define SYNC_MAX_RECV_TIME_RANGE_MS 1200
#define SYNC_DEL_WAL_MS (1000 * 60)
#define SYNC_ADD_QUORUM_COUNT 3
#define SYNC_MNODE_LOG_RETENTION 10000
#define SYNC_VNODE_LOG_RETENTION 500
#define SYNC_APPEND_ENTRIES_TIMEOUT_MS 10000
#define SYNC_MAX_BATCH_SIZE 1
#define SYNC_INDEX_BEGIN 0
...
...
@@ -157,32 +162,15 @@ typedef struct SSyncLogStore {
SLRUCache
*
pCache
;
void
*
data
;
// append one log entry
int32_t
(
*
appendEntry
)(
struct
SSyncLogStore
*
pLogStore
,
SSyncRaftEntry
*
pEntry
);
// get one log entry, user need to free pEntry->pCont
SSyncRaftEntry
*
(
*
getEntry
)(
struct
SSyncLogStore
*
pLogStore
,
SyncIndex
index
);
// truncate log with index, entries after the given index (>=index) will be deleted
int32_t
(
*
truncate
)(
struct
SSyncLogStore
*
pLogStore
,
SyncIndex
fromIndex
);
// return index of last entry
SyncIndex
(
*
getLastIndex
)(
struct
SSyncLogStore
*
pLogStore
);
// return term of last entry
SyncTerm
(
*
getLastTerm
)(
struct
SSyncLogStore
*
pLogStore
);
// update log store commit index with "index"
int32_t
(
*
updateCommitIndex
)(
struct
SSyncLogStore
*
pLogStore
,
SyncIndex
index
);
// return commit index of log
SyncIndex
(
*
getCommitIndex
)(
struct
SSyncLogStore
*
pLogStore
);
int32_t
(
*
syncLogUpdateCommitIndex
)(
struct
SSyncLogStore
*
pLogStore
,
SyncIndex
index
);
SyncIndex
(
*
syncLogCommitIndex
)(
struct
SSyncLogStore
*
pLogStore
);
SyncIndex
(
*
syncLogBeginIndex
)(
struct
SSyncLogStore
*
pLogStore
);
SyncIndex
(
*
syncLogEndIndex
)(
struct
SSyncLogStore
*
pLogStore
);
bool
(
*
syncLogIsEmpty
)(
struct
SSyncLogStore
*
pLogStore
);
int32_t
(
*
syncLogEntryCount
)(
struct
SSyncLogStore
*
pLogStore
);
int32_t
(
*
syncLogRestoreFromSnapshot
)(
struct
SSyncLogStore
*
pLogStore
,
SyncIndex
index
);
bool
(
*
syncLogIsEmpty
)(
struct
SSyncLogStore
*
pLogStore
);
bool
(
*
syncLogExist
)(
struct
SSyncLogStore
*
pLogStore
,
SyncIndex
index
);
SyncIndex
(
*
syncLogWriteIndex
)(
struct
SSyncLogStore
*
pLogStore
);
...
...
@@ -207,6 +195,7 @@ typedef struct SSyncInfo {
SMsgCb
*
msgcb
;
int32_t
(
*
FpSendMsg
)(
const
SEpSet
*
pEpSet
,
SRpcMsg
*
pMsg
);
int32_t
(
*
FpEqMsg
)(
const
SMsgCb
*
msgcb
,
SRpcMsg
*
pMsg
);
int32_t
(
*
FpEqCtrlMsg
)(
const
SMsgCb
*
msgcb
,
SRpcMsg
*
pMsg
);
}
SSyncInfo
;
int32_t
syncInit
();
...
...
@@ -217,7 +206,6 @@ void syncStop(int64_t rid);
int32_t
syncSetStandby
(
int64_t
rid
);
ESyncState
syncGetMyRole
(
int64_t
rid
);
bool
syncIsReady
(
int64_t
rid
);
bool
syncIsReadyForRead
(
int64_t
rid
);
const
char
*
syncGetMyRoleStr
(
int64_t
rid
);
bool
syncRestoreFinish
(
int64_t
rid
);
SyncTerm
syncGetMyTerm
(
int64_t
rid
);
...
...
@@ -227,13 +215,13 @@ SyncGroupId syncGetVgId(int64_t rid);
void
syncGetEpSet
(
int64_t
rid
,
SEpSet
*
pEpSet
);
void
syncGetRetryEpSet
(
int64_t
rid
,
SEpSet
*
pEpSet
);
int32_t
syncPropose
(
int64_t
rid
,
SRpcMsg
*
pMsg
,
bool
isWeak
);
int32_t
syncProposeBatch
(
int64_t
rid
,
SRpcMsg
**
pMsgPArr
,
bool
*
pIsWeakArr
,
int32_t
arrSize
);
//
int32_t syncProposeBatch(int64_t rid, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize);
bool
syncEnvIsStart
();
const
char
*
syncStr
(
ESyncState
state
);
bool
syncIsRestoreFinish
(
int64_t
rid
);
int32_t
syncGetSnapshotByIndex
(
int64_t
rid
,
SyncIndex
index
,
SSnapshot
*
pSnapshot
);
int32_t
syncReconfig
(
int64_t
rid
,
SSyncCfg
*
pNewCfg
);
int32_t
syncReconfig
(
int64_t
rid
,
const
SSyncCfg
*
pNewCfg
);
// build SRpcMsg, need to call syncPropose with SRpcMsg
int32_t
syncReconfigBuild
(
int64_t
rid
,
const
SSyncCfg
*
pNewCfg
,
SRpcMsg
*
pRpcMsg
);
...
...
@@ -241,6 +229,9 @@ int32_t syncReconfigBuild(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg
int32_t
syncLeaderTransfer
(
int64_t
rid
);
int32_t
syncLeaderTransferTo
(
int64_t
rid
,
SNodeInfo
newLeader
);
int32_t
syncBeginSnapshot
(
int64_t
rid
,
int64_t
lastApplyIndex
);
int32_t
syncEndSnapshot
(
int64_t
rid
);
#ifdef __cplusplus
}
#endif
...
...
include/libs/sync/syncTools.h
浏览文件 @
5965629b
...
...
@@ -157,6 +157,8 @@ typedef enum ESyncTimeoutType {
SYNC_TIMEOUT_HEARTBEAT
,
}
ESyncTimeoutType
;
const
char
*
syncTimerTypeStr
(
enum
ESyncTimeoutType
timerType
);
typedef
struct
SyncTimeout
{
uint32_t
bytes
;
int32_t
vgId
;
...
...
@@ -423,6 +425,7 @@ typedef struct SyncAppendEntriesReply {
SyncTerm
privateTerm
;
bool
success
;
SyncIndex
matchIndex
;
SyncIndex
lastSendIndex
;
int64_t
startTime
;
}
SyncAppendEntriesReply
;
...
...
@@ -456,6 +459,8 @@ typedef struct SyncHeartbeat {
SyncTerm
term
;
SyncIndex
commitIndex
;
SyncTerm
privateTerm
;
SyncTerm
minMatchIndex
;
}
SyncHeartbeat
;
SyncHeartbeat
*
syncHeartbeatBuild
(
int32_t
vgId
);
...
...
@@ -676,24 +681,17 @@ void syncReconfigFinishLog2(char* s, const SyncReconfigFinish* pMsg);
// on message ----------------------
int32_t
syncNodeOnPingCb
(
SSyncNode
*
ths
,
SyncPing
*
pMsg
);
int32_t
syncNodeOnPingReplyCb
(
SSyncNode
*
ths
,
SyncPingReply
*
pMsg
);
int32_t
syncNodeOnTimeoutCb
(
SSyncNode
*
ths
,
SyncTimeout
*
pMsg
);
int32_t
syncNodeOnClientRequestCb
(
SSyncNode
*
ths
,
SyncClientRequest
*
pMsg
,
SyncIndex
*
pRetIndex
);
int32_t
syncNodeOnClientRequestBatchCb
(
SSyncNode
*
ths
,
SyncClientRequestBatch
*
pMsg
);
int32_t
syncNodeOnRequestVoteCb
(
SSyncNode
*
ths
,
SyncRequestVote
*
pMsg
);
int32_t
syncNodeOnRequestVoteReplyCb
(
SSyncNode
*
ths
,
SyncRequestVoteReply
*
pMsg
);
int32_t
syncNodeOnAppendEntriesCb
(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
);
int32_t
syncNodeOnAppendEntriesReplyCb
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncNodeOnRequestVoteSnapshotCb
(
SSyncNode
*
ths
,
SyncRequestVote
*
pMsg
);
int32_t
syncNodeOnRequestVoteReplySnapshotCb
(
SSyncNode
*
ths
,
SyncRequestVoteReply
*
pMsg
);
int32_t
syncNodeOnAppendEntriesSnapshotCb
(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
);
int32_t
syncNodeOnAppendEntriesReplySnapshotCb
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncNodeOnTimer
(
SSyncNode
*
ths
,
SyncTimeout
*
pMsg
);
int32_t
syncNodeOnAppendEntriesSnapshot2Cb
(
SSyncNode
*
ths
,
SyncAppendEntriesBatch
*
pMsg
);
int32_t
syncNodeOnAppendEntriesReplySnapshot2Cb
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncNodeOnSnapshotSendCb
(
SSyncNode
*
ths
,
SyncSnapshotSend
*
pMsg
);
int32_t
syncNodeOnSnapshotRspCb
(
SSyncNode
*
ths
,
SyncSnapshotRsp
*
pMsg
);
int32_t
syncNodeOnHeartbeat
(
SSyncNode
*
ths
,
SyncHeartbeat
*
pMsg
);
int32_t
syncNodeOnHeartbeatReply
(
SSyncNode
*
ths
,
SyncHeartbeatReply
*
pMsg
);
int32_t
syncNodeOnClientRequest
(
SSyncNode
*
ths
,
SyncClientRequest
*
pMsg
,
SyncIndex
*
pRetIndex
);
int32_t
syncNodeOnRequestVote
(
SSyncNode
*
ths
,
SyncRequestVote
*
pMsg
);
int32_t
syncNodeOnRequestVoteReply
(
SSyncNode
*
ths
,
SyncRequestVoteReply
*
pMsg
);
int32_t
syncNodeOnAppendEntries
(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
);
int32_t
syncNodeOnAppendEntriesReply
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncNodeOnSnapshot
(
SSyncNode
*
ths
,
SyncSnapshotSend
*
pMsg
);
int32_t
syncNodeOnSnapshotReply
(
SSyncNode
*
ths
,
SyncSnapshotRsp
*
pMsg
);
int32_t
syncNodeOnHeartbeat
(
SSyncNode
*
ths
,
SyncHeartbeat
*
pMsg
);
int32_t
syncNodeOnHeartbeatReply
(
SSyncNode
*
ths
,
SyncHeartbeatReply
*
pMsg
);
...
...
@@ -707,8 +705,8 @@ typedef int32_t (*FpOnRequestVoteReplyCb)(SSyncNode* ths, SyncRequestVoteReply*
typedef
int32_t
(
*
FpOnAppendEntriesCb
)(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
);
typedef
int32_t
(
*
FpOnAppendEntriesReplyCb
)(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
);
typedef
int32_t
(
*
FpOnTimeoutCb
)(
SSyncNode
*
pSyncNode
,
SyncTimeout
*
pMsg
);
typedef
int32_t
(
*
FpOnSnapshot
Send
Cb
)(
SSyncNode
*
ths
,
SyncSnapshotSend
*
pMsg
);
typedef
int32_t
(
*
FpOnSnapshotR
sp
Cb
)(
SSyncNode
*
ths
,
SyncSnapshotRsp
*
pMsg
);
typedef
int32_t
(
*
FpOnSnapshotCb
)(
SSyncNode
*
ths
,
SyncSnapshotSend
*
pMsg
);
typedef
int32_t
(
*
FpOnSnapshotR
eply
Cb
)(
SSyncNode
*
ths
,
SyncSnapshotRsp
*
pMsg
);
// option ----------------------------------
bool
syncNodeSnapshotEnable
(
SSyncNode
*
pSyncNode
);
...
...
source/dnode/mgmt/mgmt_mnode/inc/mmInt.h
浏览文件 @
5965629b
...
...
@@ -34,6 +34,7 @@ typedef struct SMnodeMgmt {
SSingleWorker
readWorker
;
SSingleWorker
writeWorker
;
SSingleWorker
syncWorker
;
SSingleWorker
syncCtrlWorker
;
bool
stopped
;
int32_t
refCount
;
TdThreadRwlock
lock
;
...
...
@@ -53,6 +54,7 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt);
void
mmStopWorker
(
SMnodeMgmt
*
pMgmt
);
int32_t
mmPutMsgToWriteQueue
(
SMnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
);
int32_t
mmPutMsgToSyncQueue
(
SMnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
);
int32_t
mmPutMsgToSyncCtrlQueue
(
SMnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
);
int32_t
mmPutMsgToReadQueue
(
SMnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
);
int32_t
mmPutMsgToQueryQueue
(
SMnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
);
int32_t
mmPutMsgToFetchQueue
(
SMnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
);
...
...
source/dnode/mgmt/mgmt_mnode/src/mmHandle.c
浏览文件 @
5965629b
...
...
@@ -201,6 +201,9 @@ SArray *mmGetMsgHandles() {
if
(
dmSetMgmtHandle
(
pArray
,
TDMT_SYNC_SET_MNODE_STANDBY_RSP
,
mmPutMsgToWriteQueue
,
0
)
==
NULL
)
goto
_OVER
;
if
(
dmSetMgmtHandle
(
pArray
,
TDMT_SYNC_SET_VNODE_STANDBY_RSP
,
mmPutMsgToWriteQueue
,
0
)
==
NULL
)
goto
_OVER
;
if
(
dmSetMgmtHandle
(
pArray
,
TDMT_SYNC_HEARTBEAT
,
mmPutMsgToSyncCtrlQueue
,
1
)
==
NULL
)
goto
_OVER
;
if
(
dmSetMgmtHandle
(
pArray
,
TDMT_SYNC_HEARTBEAT_REPLY
,
mmPutMsgToSyncCtrlQueue
,
1
)
==
NULL
)
goto
_OVER
;
code
=
0
;
_OVER:
...
...
source/dnode/mgmt/mgmt_mnode/src/mmWorker.c
浏览文件 @
5965629b
...
...
@@ -67,6 +67,24 @@ static void mmProcessRpcMsg(SQueueInfo *pInfo, SRpcMsg *pMsg) {
taosFreeQitem
(
pMsg
);
}
static
void
mmProcessSyncCtrlMsg
(
SQueueInfo
*
pInfo
,
SRpcMsg
*
pMsg
)
{
SMnodeMgmt
*
pMgmt
=
pInfo
->
ahandle
;
pMsg
->
info
.
node
=
pMgmt
->
pMnode
;
const
STraceId
*
trace
=
&
pMsg
->
info
.
traceId
;
dGTrace
(
"msg:%p, get from mnode-sync-ctrl queue"
,
pMsg
);
SMsgHead
*
pHead
=
pMsg
->
pCont
;
pHead
->
contLen
=
ntohl
(
pHead
->
contLen
);
pHead
->
vgId
=
ntohl
(
pHead
->
vgId
);
int32_t
code
=
mndProcessSyncCtrlMsg
(
pMsg
);
dGTrace
(
"msg:%p, is freed, code:0x%x"
,
pMsg
,
code
);
rpcFreeCont
(
pMsg
->
pCont
);
taosFreeQitem
(
pMsg
);
}
static
void
mmProcessSyncMsg
(
SQueueInfo
*
pInfo
,
SRpcMsg
*
pMsg
)
{
SMnodeMgmt
*
pMgmt
=
pInfo
->
ahandle
;
pMsg
->
info
.
node
=
pMgmt
->
pMnode
;
...
...
@@ -108,6 +126,10 @@ int32_t mmPutMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return
mmPutMsgToWorker
(
pMgmt
,
&
pMgmt
->
syncWorker
,
pMsg
);
}
int32_t
mmPutMsgToSyncCtrlQueue
(
SMnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
)
{
return
mmPutMsgToWorker
(
pMgmt
,
&
pMgmt
->
syncCtrlWorker
,
pMsg
);
}
int32_t
mmPutMsgToReadQueue
(
SMnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
)
{
return
mmPutMsgToWorker
(
pMgmt
,
&
pMgmt
->
readWorker
,
pMsg
);
}
...
...
@@ -144,6 +166,9 @@ int32_t mmPutMsgToQueue(SMnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) {
case
SYNC_QUEUE
:
pWorker
=
&
pMgmt
->
syncWorker
;
break
;
case
SYNC_CTRL_QUEUE
:
pWorker
=
&
pMgmt
->
syncCtrlWorker
;
break
;
default:
terrno
=
TSDB_CODE_INVALID_PARA
;
}
...
...
@@ -223,6 +248,18 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt) {
return
-
1
;
}
SSingleWorkerCfg
scCfg
=
{
.
min
=
1
,
.
max
=
1
,
.
name
=
"mnode-sync-ctrl"
,
.
fp
=
(
FItem
)
mmProcessSyncCtrlMsg
,
.
param
=
pMgmt
,
};
if
(
tSingleWorkerInit
(
&
pMgmt
->
syncCtrlWorker
,
&
scCfg
)
!=
0
)
{
dError
(
"failed to start mnode mnode-sync-ctrl worker since %s"
,
terrstr
());
return
-
1
;
}
dDebug
(
"mnode workers are initialized"
);
return
0
;
}
...
...
@@ -235,5 +272,6 @@ void mmStopWorker(SMnodeMgmt *pMgmt) {
tSingleWorkerCleanup
(
&
pMgmt
->
readWorker
);
tSingleWorkerCleanup
(
&
pMgmt
->
writeWorker
);
tSingleWorkerCleanup
(
&
pMgmt
->
syncWorker
);
tSingleWorkerCleanup
(
&
pMgmt
->
syncCtrlWorker
);
dDebug
(
"mnode workers are closed"
);
}
source/dnode/mgmt/mgmt_vnode/src/vmWorker.c
浏览文件 @
5965629b
...
...
@@ -231,11 +231,9 @@ static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtyp
return
code
;
}
int32_t
vmPutMsgToSync
Queue
(
SVnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
)
{
return
vmPutMsgToQueue
(
pMgmt
,
pMsg
,
SYNC
_QUEUE
);
}
int32_t
vmPutMsgToSync
CtrlQueue
(
SVnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
)
{
return
vmPutMsgToQueue
(
pMgmt
,
pMsg
,
SYNC_CTRL
_QUEUE
);
}
int32_t
vmPutMsgToSyncCtrlQueue
(
SVnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
)
{
return
vmPutMsgToQueue
(
pMgmt
,
pMsg
,
SYNC_CTRL_QUEUE
);
}
int32_t
vmPutMsgToSyncQueue
(
SVnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
)
{
return
vmPutMsgToQueue
(
pMgmt
,
pMsg
,
SYNC_QUEUE
);
}
int32_t
vmPutMsgToWriteQueue
(
SVnodeMgmt
*
pMgmt
,
SRpcMsg
*
pMsg
)
{
return
vmPutMsgToQueue
(
pMgmt
,
pMsg
,
WRITE_QUEUE
);
}
...
...
source/dnode/mnode/impl/src/mndMain.c
浏览文件 @
5965629b
...
...
@@ -474,6 +474,45 @@ void mndStop(SMnode *pMnode) {
mndCleanupTimer
(
pMnode
);
}
int32_t
mndProcessSyncCtrlMsg
(
SRpcMsg
*
pMsg
)
{
SMnode
*
pMnode
=
pMsg
->
info
.
node
;
SSyncMgmt
*
pMgmt
=
&
pMnode
->
syncMgmt
;
int32_t
code
=
0
;
mInfo
(
"vgId:%d, process sync ctrl msg"
,
1
);
if
(
!
syncEnvIsStart
())
{
mError
(
"failed to process sync msg:%p type:%s since syncEnv stop"
,
pMsg
,
TMSG_INFO
(
pMsg
->
msgType
));
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
return
-
1
;
}
SSyncNode
*
pSyncNode
=
syncNodeAcquire
(
pMgmt
->
sync
);
if
(
pSyncNode
==
NULL
)
{
mError
(
"failed to process sync msg:%p type:%s since syncNode is null"
,
pMsg
,
TMSG_INFO
(
pMsg
->
msgType
));
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
return
-
1
;
}
if
(
pMsg
->
msgType
==
TDMT_SYNC_HEARTBEAT
)
{
SyncHeartbeat
*
pSyncMsg
=
syncHeartbeatFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnHeartbeat
(
pSyncNode
,
pSyncMsg
);
syncHeartbeatDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_HEARTBEAT_REPLY
)
{
SyncHeartbeatReply
*
pSyncMsg
=
syncHeartbeatReplyFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnHeartbeatReply
(
pSyncNode
,
pSyncMsg
);
syncHeartbeatReplyDestroy
(
pSyncMsg
);
}
syncNodeRelease
(
pSyncNode
);
if
(
code
!=
0
)
{
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
}
return
code
;
}
int32_t
mndProcessSyncMsg
(
SRpcMsg
*
pMsg
)
{
SMnode
*
pMnode
=
pMsg
->
info
.
node
;
SSyncMgmt
*
pMgmt
=
&
pMnode
->
syncMgmt
;
...
...
@@ -492,97 +531,64 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
return
-
1
;
}
// ToDo: ugly! use function pointer
if
(
syncNodeStrategy
(
pSyncNode
)
==
SYNC_STRATEGY_STANDARD_SNAPSHOT
)
{
if
(
pMsg
->
msgType
==
TDMT_SYNC_TIMEOUT
)
{
SyncTimeout
*
pSyncMsg
=
syncTimeoutFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnTimeoutCb
(
pSyncNode
,
pSyncMsg
);
syncTimeoutDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING
)
{
SyncPing
*
pSyncMsg
=
syncPingFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnPingCb
(
pSyncNode
,
pSyncMsg
);
syncPingDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING_REPLY
)
{
SyncPingReply
*
pSyncMsg
=
syncPingReplyFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnPingReplyCb
(
pSyncNode
,
pSyncMsg
);
syncPingReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_CLIENT_REQUEST
)
{
SyncClientRequest
*
pSyncMsg
=
syncClientRequestFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnClientRequestCb
(
pSyncNode
,
pSyncMsg
,
NULL
);
syncClientRequestDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE
)
{
SyncRequestVote
*
pSyncMsg
=
syncRequestVoteFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnRequestVoteSnapshotCb
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE_REPLY
)
{
SyncRequestVoteReply
*
pSyncMsg
=
syncRequestVoteReplyFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnRequestVoteReplySnapshotCb
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES
)
{
SyncAppendEntries
*
pSyncMsg
=
syncAppendEntriesFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnAppendEntriesSnapshotCb
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES_REPLY
)
{
SyncAppendEntriesReply
*
pSyncMsg
=
syncAppendEntriesReplyFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnAppendEntriesReplySnapshotCb
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SNAPSHOT_SEND
)
{
SyncSnapshotSend
*
pSyncMsg
=
syncSnapshotSendFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnSnapshotSendCb
(
pSyncNode
,
pSyncMsg
);
syncSnapshotSendDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SNAPSHOT_RSP
)
{
SyncSnapshotRsp
*
pSyncMsg
=
syncSnapshotRspFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnSnapshotRspCb
(
pSyncNode
,
pSyncMsg
);
syncSnapshotRspDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SET_MNODE_STANDBY
)
{
code
=
syncSetStandby
(
pMgmt
->
sync
);
SRpcMsg
rsp
=
{.
code
=
code
,
.
info
=
pMsg
->
info
};
tmsgSendRsp
(
&
rsp
);
}
else
{
mError
(
"failed to process msg:%p since invalid type:%s"
,
pMsg
,
TMSG_INFO
(
pMsg
->
msgType
));
code
=
-
1
;
}
if
(
pMsg
->
msgType
==
TDMT_SYNC_TIMEOUT
)
{
SyncTimeout
*
pSyncMsg
=
syncTimeoutFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnTimer
(
pSyncNode
,
pSyncMsg
);
syncTimeoutDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING
)
{
SyncPing
*
pSyncMsg
=
syncPingFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnPingCb
(
pSyncNode
,
pSyncMsg
);
syncPingDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING_REPLY
)
{
SyncPingReply
*
pSyncMsg
=
syncPingReplyFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnPingReplyCb
(
pSyncNode
,
pSyncMsg
);
syncPingReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_CLIENT_REQUEST
)
{
SyncClientRequest
*
pSyncMsg
=
syncClientRequestFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnClientRequest
(
pSyncNode
,
pSyncMsg
,
NULL
);
syncClientRequestDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE
)
{
SyncRequestVote
*
pSyncMsg
=
syncRequestVoteFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnRequestVote
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE_REPLY
)
{
SyncRequestVoteReply
*
pSyncMsg
=
syncRequestVoteReplyFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnRequestVoteReply
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES
)
{
SyncAppendEntries
*
pSyncMsg
=
syncAppendEntriesFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnAppendEntries
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES_REPLY
)
{
SyncAppendEntriesReply
*
pSyncMsg
=
syncAppendEntriesReplyFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnAppendEntriesReply
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SNAPSHOT_SEND
)
{
SyncSnapshotSend
*
pSyncMsg
=
syncSnapshotSendFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnSnapshot
(
pSyncNode
,
pSyncMsg
);
syncSnapshotSendDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SNAPSHOT_RSP
)
{
SyncSnapshotRsp
*
pSyncMsg
=
syncSnapshotRspFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnSnapshotReply
(
pSyncNode
,
pSyncMsg
);
syncSnapshotRspDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SET_MNODE_STANDBY
)
{
code
=
syncSetStandby
(
pMgmt
->
sync
);
SRpcMsg
rsp
=
{.
code
=
code
,
.
info
=
pMsg
->
info
};
tmsgSendRsp
(
&
rsp
);
}
else
{
if
(
pMsg
->
msgType
==
TDMT_SYNC_TIMEOUT
)
{
SyncTimeout
*
pSyncMsg
=
syncTimeoutFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnTimeoutCb
(
pSyncNode
,
pSyncMsg
);
syncTimeoutDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING
)
{
SyncPing
*
pSyncMsg
=
syncPingFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnPingCb
(
pSyncNode
,
pSyncMsg
);
syncPingDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING_REPLY
)
{
SyncPingReply
*
pSyncMsg
=
syncPingReplyFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnPingReplyCb
(
pSyncNode
,
pSyncMsg
);
syncPingReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_CLIENT_REQUEST
)
{
SyncClientRequest
*
pSyncMsg
=
syncClientRequestFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnClientRequestCb
(
pSyncNode
,
pSyncMsg
,
NULL
);
syncClientRequestDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE
)
{
SyncRequestVote
*
pSyncMsg
=
syncRequestVoteFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnRequestVoteCb
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE_REPLY
)
{
SyncRequestVoteReply
*
pSyncMsg
=
syncRequestVoteReplyFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnRequestVoteReplyCb
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES
)
{
SyncAppendEntries
*
pSyncMsg
=
syncAppendEntriesFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnAppendEntriesCb
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES_REPLY
)
{
SyncAppendEntriesReply
*
pSyncMsg
=
syncAppendEntriesReplyFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnAppendEntriesReplyCb
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SET_MNODE_STANDBY
)
{
code
=
syncSetStandby
(
pMgmt
->
sync
);
SRpcMsg
rsp
=
{.
code
=
code
,
.
info
=
pMsg
->
info
};
tmsgSendRsp
(
&
rsp
);
}
else
{
mError
(
"failed to process msg:%p since invalid type:%s"
,
pMsg
,
TMSG_INFO
(
pMsg
->
msgType
));
code
=
-
1
;
}
mError
(
"failed to process msg:%p since invalid type:%s"
,
pMsg
,
TMSG_INFO
(
pMsg
->
msgType
));
code
=
-
1
;
}
syncNodeRelease
(
pSyncNode
);
...
...
source/dnode/vnode/src/vnd/vnodeCommit.c
浏览文件 @
5965629b
...
...
@@ -15,7 +15,7 @@
#include "vnd.h"
#define VND_INFO_FNAME
"vnode.json"
#define VND_INFO_FNAME "vnode.json"
#define VND_INFO_FNAME_TMP "vnode_tmp.json"
static
int
vnodeEncodeInfo
(
const
SVnodeInfo
*
pInfo
,
char
**
ppData
);
...
...
@@ -237,7 +237,9 @@ int vnodeCommit(SVnode *pVnode) {
code
=
terrno
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
walBeginSnapshot
(
pVnode
->
pWal
,
pVnode
->
state
.
applied
);
// walBeginSnapshot(pVnode->pWal, pVnode->state.applied);
syncBeginSnapshot
(
pVnode
->
sync
,
pVnode
->
state
.
applied
);
if
(
smaPreCommit
(
pVnode
->
pSma
)
<
0
)
{
vError
(
"vgId:%d, failed to pre-commit sma since %s"
,
TD_VID
(
pVnode
),
tstrerror
(
terrno
));
...
...
@@ -289,7 +291,8 @@ int vnodeCommit(SVnode *pVnode) {
}
// apply the commit (TODO)
walEndSnapshot
(
pVnode
->
pWal
);
// walEndSnapshot(pVnode->pWal);
syncEndSnapshot
(
pVnode
->
sync
);
_exit:
if
(
code
)
{
...
...
source/dnode/vnode/src/vnd/vnodeSync.c
浏览文件 @
5965629b
...
...
@@ -385,126 +385,73 @@ int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
vGTrace
(
"vgId:%d, sync msg:%p will be processed, type:%s"
,
pVnode
->
config
.
vgId
,
pMsg
,
TMSG_INFO
(
pMsg
->
msgType
));
if
(
syncNodeStrategy
(
pSyncNode
)
==
SYNC_STRATEGY_NO_SNAPSHOT
)
{
if
(
pMsg
->
msgType
==
TDMT_SYNC_TIMEOUT
)
{
SyncTimeout
*
pSyncMsg
=
syncTimeoutFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnTimeoutCb
(
pSyncNode
,
pSyncMsg
);
syncTimeoutDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING
)
{
SyncPing
*
pSyncMsg
=
syncPingFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnPingCb
(
pSyncNode
,
pSyncMsg
);
syncPingDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING_REPLY
)
{
SyncPingReply
*
pSyncMsg
=
syncPingReplyFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnPingReplyCb
(
pSyncNode
,
pSyncMsg
);
syncPingReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_CLIENT_REQUEST
)
{
SyncClientRequest
*
pSyncMsg
=
syncClientRequestFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnClientRequestCb
(
pSyncNode
,
pSyncMsg
,
NULL
);
syncClientRequestDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_CLIENT_REQUEST_BATCH
)
{
SyncClientRequestBatch
*
pSyncMsg
=
syncClientRequestBatchFromRpcMsg
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnClientRequestBatchCb
(
pSyncNode
,
pSyncMsg
);
syncClientRequestBatchDestroyDeep
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE
)
{
SyncRequestVote
*
pSyncMsg
=
syncRequestVoteFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnRequestVoteCb
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE_REPLY
)
{
SyncRequestVoteReply
*
pSyncMsg
=
syncRequestVoteReplyFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnRequestVoteReplyCb
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES
)
{
SyncAppendEntries
*
pSyncMsg
=
syncAppendEntriesFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnAppendEntriesCb
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES_REPLY
)
{
SyncAppendEntriesReply
*
pSyncMsg
=
syncAppendEntriesReplyFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnAppendEntriesReplyCb
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SET_VNODE_STANDBY
)
{
code
=
vnodeSetStandBy
(
pVnode
);
if
(
code
!=
0
&&
terrno
!=
0
)
code
=
terrno
;
SRpcMsg
rsp
=
{.
code
=
code
,
.
info
=
pMsg
->
info
};
tmsgSendRsp
(
&
rsp
);
}
else
{
vGError
(
"vgId:%d, msg:%p failed to process since error msg type:%d"
,
pVnode
->
config
.
vgId
,
pMsg
,
pMsg
->
msgType
);
code
=
-
1
;
}
if
(
pMsg
->
msgType
==
TDMT_SYNC_TIMEOUT
)
{
SyncTimeout
*
pSyncMsg
=
syncTimeoutFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnTimer
(
pSyncNode
,
pSyncMsg
);
syncTimeoutDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING
)
{
SyncPing
*
pSyncMsg
=
syncPingFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnPingCb
(
pSyncNode
,
pSyncMsg
);
syncPingDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING_REPLY
)
{
SyncPingReply
*
pSyncMsg
=
syncPingReplyFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnPingReplyCb
(
pSyncNode
,
pSyncMsg
);
syncPingReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_CLIENT_REQUEST
)
{
SyncClientRequest
*
pSyncMsg
=
syncClientRequestFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnClientRequest
(
pSyncNode
,
pSyncMsg
,
NULL
);
syncClientRequestDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE
)
{
SyncRequestVote
*
pSyncMsg
=
syncRequestVoteFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnRequestVote
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE_REPLY
)
{
SyncRequestVoteReply
*
pSyncMsg
=
syncRequestVoteReplyFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnRequestVoteReply
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES
)
{
SyncAppendEntries
*
pSyncMsg
=
syncAppendEntriesFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnAppendEntries
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES_REPLY
)
{
SyncAppendEntriesReply
*
pSyncMsg
=
syncAppendEntriesReplyFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnAppendEntriesReply
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SNAPSHOT_SEND
)
{
SyncSnapshotSend
*
pSyncMsg
=
syncSnapshotSendFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnSnapshot
(
pSyncNode
,
pSyncMsg
);
syncSnapshotSendDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SNAPSHOT_RSP
)
{
SyncSnapshotRsp
*
pSyncMsg
=
syncSnapshotRspFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnSnapshotReply
(
pSyncNode
,
pSyncMsg
);
syncSnapshotRspDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SET_VNODE_STANDBY
)
{
code
=
vnodeSetStandBy
(
pVnode
);
if
(
code
!=
0
&&
terrno
!=
0
)
code
=
terrno
;
SRpcMsg
rsp
=
{.
code
=
code
,
.
info
=
pMsg
->
info
};
tmsgSendRsp
(
&
rsp
);
}
else
if
(
syncNodeStrategy
(
pSyncNode
)
==
SYNC_STRATEGY_WAL_FIRST
)
{
// use wal first strategy
if
(
pMsg
->
msgType
==
TDMT_SYNC_TIMEOUT
)
{
SyncTimeout
*
pSyncMsg
=
syncTimeoutFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnTimeoutCb
(
pSyncNode
,
pSyncMsg
);
syncTimeoutDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING
)
{
SyncPing
*
pSyncMsg
=
syncPingFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnPingCb
(
pSyncNode
,
pSyncMsg
);
syncPingDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_PING_REPLY
)
{
SyncPingReply
*
pSyncMsg
=
syncPingReplyFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnPingReplyCb
(
pSyncNode
,
pSyncMsg
);
syncPingReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_CLIENT_REQUEST
)
{
SyncClientRequest
*
pSyncMsg
=
syncClientRequestFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnClientRequestCb
(
pSyncNode
,
pSyncMsg
,
NULL
);
syncClientRequestDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_CLIENT_REQUEST_BATCH
)
{
SyncClientRequestBatch
*
pSyncMsg
=
syncClientRequestBatchFromRpcMsg
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnClientRequestBatchCb
(
pSyncNode
,
pSyncMsg
);
syncClientRequestBatchDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE
)
{
SyncRequestVote
*
pSyncMsg
=
syncRequestVoteFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnRequestVoteSnapshotCb
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_REQUEST_VOTE_REPLY
)
{
SyncRequestVoteReply
*
pSyncMsg
=
syncRequestVoteReplyFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnRequestVoteReplySnapshotCb
(
pSyncNode
,
pSyncMsg
);
syncRequestVoteReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES_BATCH
)
{
SyncAppendEntriesBatch
*
pSyncMsg
=
syncAppendEntriesBatchFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnAppendEntriesSnapshot2Cb
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesBatchDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_APPEND_ENTRIES_REPLY
)
{
SyncAppendEntriesReply
*
pSyncMsg
=
syncAppendEntriesReplyFromRpcMsg2
(
pMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
code
=
syncNodeOnAppendEntriesReplySnapshot2Cb
(
pSyncNode
,
pSyncMsg
);
syncAppendEntriesReplyDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SNAPSHOT_SEND
)
{
SyncSnapshotSend
*
pSyncMsg
=
syncSnapshotSendFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnSnapshotSendCb
(
pSyncNode
,
pSyncMsg
);
syncSnapshotSendDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SNAPSHOT_RSP
)
{
SyncSnapshotRsp
*
pSyncMsg
=
syncSnapshotRspFromRpcMsg2
(
pMsg
);
code
=
syncNodeOnSnapshotRspCb
(
pSyncNode
,
pSyncMsg
);
syncSnapshotRspDestroy
(
pSyncMsg
);
}
else
if
(
pMsg
->
msgType
==
TDMT_SYNC_SET_VNODE_STANDBY
)
{
code
=
vnodeSetStandBy
(
pVnode
);
if
(
code
!=
0
&&
terrno
!=
0
)
code
=
terrno
;
SRpcMsg
rsp
=
{.
code
=
code
,
.
info
=
pMsg
->
info
};
tmsgSendRsp
(
&
rsp
);
}
else
{
vGError
(
"vgId:%d, msg:%p failed to process since error msg type:%d"
,
pVnode
->
config
.
vgId
,
pMsg
,
pMsg
->
msgType
);
code
=
-
1
;
}
}
else
{
vGError
(
"vgId:%d, msg:%p failed to process since error msg type:%d"
,
pVnode
->
config
.
vgId
,
pMsg
,
pMsg
->
msgType
);
code
=
-
1
;
}
vTrace
(
"vgId:%d, sync msg:%p is processed, type:%s code:0x%x"
,
pVnode
->
config
.
vgId
,
pMsg
,
TMSG_INFO
(
pMsg
->
msgType
),
...
...
@@ -516,6 +463,19 @@ int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
return
code
;
}
static
int32_t
vnodeSyncEqCtrlMsg
(
const
SMsgCb
*
msgcb
,
SRpcMsg
*
pMsg
)
{
if
(
msgcb
==
NULL
)
{
return
-
1
;
}
int32_t
code
=
tmsgPutToQueue
(
msgcb
,
SYNC_CTRL_QUEUE
,
pMsg
);
if
(
code
!=
0
)
{
rpcFreeCont
(
pMsg
->
pCont
);
pMsg
->
pCont
=
NULL
;
}
return
code
;
}
static
int32_t
vnodeSyncEqMsg
(
const
SMsgCb
*
msgcb
,
SRpcMsg
*
pMsg
)
{
if
(
msgcb
==
NULL
)
{
return
-
1
;
...
...
@@ -801,6 +761,7 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path) {
.
msgcb
=
NULL
,
.
FpSendMsg
=
vnodeSyncSendMsg
,
.
FpEqMsg
=
vnodeSyncEqMsg
,
.
FpEqCtrlMsg
=
vnodeSyncEqCtrlMsg
,
};
snprintf
(
syncInfo
.
path
,
sizeof
(
syncInfo
.
path
),
"%s%ssync"
,
path
,
TD_DIRSEP
);
...
...
source/libs/sync/inc/syncAppendEntries.h
浏览文件 @
5965629b
...
...
@@ -92,9 +92,8 @@ extern "C" {
// /\ UNCHANGED <<serverVars, commitIndex, messages>>
// /\ UNCHANGED <<candidateVars, leaderVars>>
//
int32_t
syncNodeOnAppendEntriesCb
(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
);
int32_t
syncNodeOnAppendEntriesSnapshotCb
(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
);
int32_t
syncNodeOnAppendEntriesSnapshot2Cb
(
SSyncNode
*
ths
,
SyncAppendEntriesBatch
*
pMsg
);
int32_t
syncNodeOnAppendEntries
(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
);
#ifdef __cplusplus
}
...
...
source/libs/sync/inc/syncAppendEntriesReply.h
浏览文件 @
5965629b
...
...
@@ -40,9 +40,7 @@ extern "C" {
// /\ Discard(m)
// /\ UNCHANGED <<serverVars, candidateVars, logVars, elections>>
//
int32_t
syncNodeOnAppendEntriesReplyCb
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncNodeOnAppendEntriesReplySnapshotCb
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncNodeOnAppendEntriesReplySnapshot2Cb
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncNodeOnAppendEntriesReply
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
);
#ifdef __cplusplus
}
...
...
source/libs/sync/inc/syncElection.h
浏览文件 @
5965629b
...
...
@@ -37,12 +37,10 @@ extern "C" {
// msource |-> i,
// mdest |-> j])
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
//
int32_t
syncNodeRequestVotePeers
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeRequestVotePeersSnapshot
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeElect
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeRequestVote
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
const
SyncRequestVote
*
pMsg
);
int32_t
syncNodeRequestVotePeers
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeSendRequestVote
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
const
SyncRequestVote
*
pMsg
);
#ifdef __cplusplus
}
...
...
source/libs/sync/inc/syncEnv.h
浏览文件 @
5965629b
...
...
@@ -28,13 +28,13 @@ extern "C" {
#include "trpc.h"
#include "ttimer.h"
#define TIMER_MAX_MS
0x7FFFFFFF
#define ENV_TICK_TIMER_MS
1000
#define PING_TIMER_MS
5000
#define ELECT_TIMER_MS_MIN
5000
#define ELECT_TIMER_MS_MAX
(ELECT_TIMER_MS_MIN * 2)
#define TIMER_MAX_MS 0x7FFFFFFF
#define ENV_TICK_TIMER_MS 1000
#define PING_TIMER_MS 5000
#define ELECT_TIMER_MS_MIN 5000
#define ELECT_TIMER_MS_MAX (ELECT_TIMER_MS_MIN * 2)
#define ELECT_TIMER_MS_RANGE (ELECT_TIMER_MS_MAX - ELECT_TIMER_MS_MIN)
#define HEARTBEAT_TIMER_MS
900
#define HEARTBEAT_TIMER_MS 900
#define EMPTY_RAFT_ID ((SRaftId){.addr = 0, .vgId = 0})
...
...
source/libs/sync/inc/syncIO.h
浏览文件 @
5965629b
...
...
@@ -56,9 +56,8 @@ typedef struct SSyncIO {
int32_t
(
*
FpOnSyncAppendEntries
)(
SSyncNode
*
pSyncNode
,
SyncAppendEntries
*
pMsg
);
int32_t
(
*
FpOnSyncAppendEntriesReply
)(
SSyncNode
*
pSyncNode
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
(
*
FpOnSyncTimeout
)(
SSyncNode
*
pSyncNode
,
SyncTimeout
*
pMsg
);
int32_t
(
*
FpOnSyncSnapshotSend
)(
SSyncNode
*
pSyncNode
,
SyncSnapshotSend
*
pMsg
);
int32_t
(
*
FpOnSyncSnapshotRsp
)(
SSyncNode
*
pSyncNode
,
SyncSnapshotRsp
*
pMsg
);
int32_t
(
*
FpOnSyncSnapshot
)(
SSyncNode
*
pSyncNode
,
SyncSnapshotSend
*
pMsg
);
int32_t
(
*
FpOnSyncSnapshotReply
)(
SSyncNode
*
pSyncNode
,
SyncSnapshotRsp
*
pMsg
);
int8_t
isStart
;
...
...
source/libs/sync/inc/syncIndexMgr.h
浏览文件 @
5965629b
...
...
@@ -45,8 +45,8 @@ void syncIndexMgrDestroy(SSyncIndexMgr *pSyncIndexMgr);
void
syncIndexMgrClear
(
SSyncIndexMgr
*
pSyncIndexMgr
);
void
syncIndexMgrSetIndex
(
SSyncIndexMgr
*
pSyncIndexMgr
,
const
SRaftId
*
pRaftId
,
SyncIndex
index
);
SyncIndex
syncIndexMgrGetIndex
(
SSyncIndexMgr
*
pSyncIndexMgr
,
const
SRaftId
*
pRaftId
);
cJSON
*
syncIndexMgr2Json
(
SSyncIndexMgr
*
pSyncIndexMgr
);
char
*
syncIndexMgr2Str
(
SSyncIndexMgr
*
pSyncIndexMgr
);
cJSON
*
syncIndexMgr2Json
(
SSyncIndexMgr
*
pSyncIndexMgr
);
char
*
syncIndexMgr2Str
(
SSyncIndexMgr
*
pSyncIndexMgr
);
void
syncIndexMgrSetStartTime
(
SSyncIndexMgr
*
pSyncIndexMgr
,
const
SRaftId
*
pRaftId
,
int64_t
startTime
);
int64_t
syncIndexMgrGetStartTime
(
SSyncIndexMgr
*
pSyncIndexMgr
,
const
SRaftId
*
pRaftId
);
...
...
source/libs/sync/inc/syncInt.h
浏览文件 @
5965629b
...
...
@@ -57,9 +57,37 @@ typedef struct SRaftCfg SRaftCfg;
typedef
struct
SSyncRespMgr
SSyncRespMgr
;
typedef
struct
SSyncSnapshotSender
SSyncSnapshotSender
;
typedef
struct
SSyncSnapshotReceiver
SSyncSnapshotReceiver
;
typedef
struct
SSyncTimer
SSyncTimer
;
typedef
struct
SSyncHbTimerData
SSyncHbTimerData
;
extern
bool
gRaftDetailLog
;
typedef
struct
SSyncHbTimerData
{
SSyncNode
*
pSyncNode
;
SSyncTimer
*
pTimer
;
SRaftId
destId
;
uint64_t
logicClock
;
}
SSyncHbTimerData
;
typedef
struct
SSyncTimer
{
void
*
pTimer
;
TAOS_TMR_CALLBACK
timerCb
;
uint64_t
logicClock
;
uint64_t
counter
;
int32_t
timerMS
;
SRaftId
destId
;
void
*
pData
;
}
SSyncTimer
;
int32_t
syncHbTimerInit
(
SSyncNode
*
pSyncNode
,
SSyncTimer
*
pSyncTimer
,
SRaftId
destId
);
int32_t
syncHbTimerStart
(
SSyncNode
*
pSyncNode
,
SSyncTimer
*
pSyncTimer
);
int32_t
syncHbTimerStop
(
SSyncNode
*
pSyncNode
,
SSyncTimer
*
pSyncTimer
);
typedef
struct
SPeerState
{
SyncIndex
lastSendIndex
;
int64_t
lastSendTime
;
}
SPeerState
;
typedef
struct
SSyncNode
{
// init by SSyncInfo
SyncGroupId
vgId
;
...
...
@@ -73,6 +101,7 @@ typedef struct SSyncNode {
const
SMsgCb
*
msgcb
;
int32_t
(
*
FpSendMsg
)(
const
SEpSet
*
pEpSet
,
SRpcMsg
*
pMsg
);
int32_t
(
*
FpEqMsg
)(
const
SMsgCb
*
msgcb
,
SRpcMsg
*
pMsg
);
int32_t
(
*
FpEqCtrlMsg
)(
const
SMsgCb
*
msgcb
,
SRpcMsg
*
pMsg
);
// init internal
SNodeInfo
myNodeInfo
;
...
...
@@ -138,6 +167,9 @@ typedef struct SSyncNode {
TAOS_TMR_CALLBACK
FpHeartbeatTimerCB
;
// Timer Fp
uint64_t
heartbeatTimerCounter
;
// peer heartbeat timer
SSyncTimer
peerHeartbeatTimerArr
[
TSDB_MAX_REPLICA
];
// callback
FpOnPingCb
FpOnPing
;
FpOnPingReplyCb
FpOnPingReply
;
...
...
@@ -147,8 +179,8 @@ typedef struct SSyncNode {
FpOnRequestVoteReplyCb
FpOnRequestVoteReply
;
FpOnAppendEntriesCb
FpOnAppendEntries
;
FpOnAppendEntriesReplyCb
FpOnAppendEntriesReply
;
FpOnSnapshot
SendCb
FpOnSnapshotSend
;
FpOnSnapshotR
spCb
FpOnSnapshotRsp
;
FpOnSnapshot
Cb
FpOnSnapshot
;
FpOnSnapshotR
eplyCb
FpOnSnapshotReply
;
// tools
SSyncRespMgr
*
pSyncRespMgr
;
...
...
@@ -159,9 +191,15 @@ typedef struct SSyncNode {
SSyncSnapshotSender
*
senders
[
TSDB_MAX_REPLICA
];
SSyncSnapshotReceiver
*
pNewNodeReceiver
;
SPeerState
peerStates
[
TSDB_MAX_REPLICA
];
// is config changing
bool
changing
;
int64_t
snapshottingIndex
;
int64_t
snapshottingTime
;
int64_t
minMatchIndex
;
int64_t
startTime
;
int64_t
leaderTime
;
int64_t
lastReplicateTime
;
...
...
@@ -174,7 +212,6 @@ void syncNodeStart(SSyncNode* pSyncNode);
void
syncNodeStartStandBy
(
SSyncNode
*
pSyncNode
);
void
syncNodeClose
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodePropose
(
SSyncNode
*
pSyncNode
,
SRpcMsg
*
pMsg
,
bool
isWeak
);
int32_t
syncNodeProposeBatch
(
SSyncNode
*
pSyncNode
,
SRpcMsg
**
pMsgPArr
,
bool
*
pIsWeakArr
,
int32_t
arrSize
);
// option
bool
syncNodeSnapshotEnable
(
SSyncNode
*
pSyncNode
);
...
...
@@ -197,23 +234,20 @@ int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms);
int32_t
syncNodeResetElectTimer
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeStartHeartbeatTimer
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeStartHeartbeatTimerNow
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeStartHeartbeatTimerMS
(
SSyncNode
*
pSyncNode
,
int32_t
ms
);
int32_t
syncNodeStopHeartbeatTimer
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeRestartHeartbeatTimer
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeRestartHeartbeatTimerNow
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeRestartNowHeartbeatTimerMS
(
SSyncNode
*
pSyncNode
,
int32_t
ms
);
// utils --------------
int32_t
syncNodeSendMsgById
(
const
SRaftId
*
destRaftId
,
SSyncNode
*
pSyncNode
,
SRpcMsg
*
pMsg
);
int32_t
syncNodeSendMsgByInfo
(
const
SNodeInfo
*
nodeInfo
,
SSyncNode
*
pSyncNode
,
SRpcMsg
*
pMsg
);
cJSON
*
syncNode2Json
(
const
SSyncNode
*
pSyncNode
);
char
*
syncNode2Str
(
const
SSyncNode
*
pSyncNode
);
void
syncNodeEventLog
(
const
SSyncNode
*
pSyncNode
,
char
*
str
);
void
syncNodeErrorLog
(
const
SSyncNode
*
pSyncNode
,
char
*
str
);
char
*
syncNode2SimpleStr
(
const
SSyncNode
*
pSyncNode
);
bool
syncNodeInConfig
(
SSyncNode
*
pSyncNode
,
const
SSyncCfg
*
config
);
void
syncNodeDoConfigChange
(
SSyncNode
*
pSyncNode
,
SSyncCfg
*
newConfig
,
SyncIndex
lastConfigChangeIndex
);
int32_t
syncNodeSendMsgById
(
const
SRaftId
*
destRaftId
,
SSyncNode
*
pSyncNode
,
SRpcMsg
*
pMsg
);
int32_t
syncNodeSendMsgByInfo
(
const
SNodeInfo
*
nodeInfo
,
SSyncNode
*
pSyncNode
,
SRpcMsg
*
pMsg
);
cJSON
*
syncNode2Json
(
const
SSyncNode
*
pSyncNode
);
char
*
syncNode2Str
(
const
SSyncNode
*
pSyncNode
);
void
syncNodeEventLog
(
const
SSyncNode
*
pSyncNode
,
char
*
str
);
void
syncNodeErrorLog
(
const
SSyncNode
*
pSyncNode
,
char
*
str
);
char
*
syncNode2SimpleStr
(
const
SSyncNode
*
pSyncNode
);
bool
syncNodeInConfig
(
SSyncNode
*
pSyncNode
,
const
SSyncCfg
*
config
);
void
syncNodeDoConfigChange
(
SSyncNode
*
pSyncNode
,
SSyncCfg
*
newConfig
,
SyncIndex
lastConfigChangeIndex
);
SyncIndex
syncMinMatchIndex
(
SSyncNode
*
pSyncNode
);
SSyncNode
*
syncNodeAcquire
(
int64_t
rid
);
void
syncNodeRelease
(
SSyncNode
*
pNode
);
...
...
@@ -221,6 +255,7 @@ void syncNodeRelease(SSyncNode* pNode);
// raft state change --------------
void
syncNodeUpdateTerm
(
SSyncNode
*
pSyncNode
,
SyncTerm
term
);
void
syncNodeUpdateTermWithoutStepDown
(
SSyncNode
*
pSyncNode
,
SyncTerm
term
);
void
syncNodeStepDown
(
SSyncNode
*
pSyncNode
,
SyncTerm
newTerm
);
void
syncNodeBecomeFollower
(
SSyncNode
*
pSyncNode
,
const
char
*
debugStr
);
void
syncNodeBecomeLeader
(
SSyncNode
*
pSyncNode
,
const
char
*
debugStr
);
...
...
@@ -240,21 +275,23 @@ void syncNodeMaybeUpdateCommitBySnapshot(SSyncNode* pSyncNode);
SyncIndex
syncNodeGetLastIndex
(
const
SSyncNode
*
pSyncNode
);
SyncTerm
syncNodeGetLastTerm
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeGetLastIndexTerm
(
SSyncNode
*
pSyncNode
,
SyncIndex
*
pLastIndex
,
SyncTerm
*
pLastTerm
);
SyncIndex
syncNodeSyncStartIndex
(
SSyncNode
*
pSyncNode
);
SyncIndex
syncNodeGetPreIndex
(
SSyncNode
*
pSyncNode
,
SyncIndex
index
);
SyncTerm
syncNodeGetPreTerm
(
SSyncNode
*
pSyncNode
,
SyncIndex
index
);
int32_t
syncNodeGetPreIndexTerm
(
SSyncNode
*
pSyncNode
,
SyncIndex
index
,
SyncIndex
*
pPreIndex
,
SyncTerm
*
pPreTerm
);
bool
syncNodeIsOptimizedOneReplica
(
SSyncNode
*
ths
,
SRpcMsg
*
pMsg
);
int32_t
syncNodeCommit
(
SSyncNode
*
ths
,
SyncIndex
beginIndex
,
SyncIndex
endIndex
,
uint64_t
flag
);
int32_t
syncNodeDoCommit
(
SSyncNode
*
ths
,
SyncIndex
beginIndex
,
SyncIndex
endIndex
,
uint64_t
flag
);
int32_t
syncNodeFollowerCommit
(
SSyncNode
*
ths
,
SyncIndex
newCommitIndex
);
int32_t
syncNodePreCommit
(
SSyncNode
*
ths
,
SSyncRaftEntry
*
pEntry
,
int32_t
code
);
int32_t
syncNodeUpdateNewConfigIndex
(
SSyncNode
*
ths
,
SSyncCfg
*
pNewCfg
);
bool
syncNodeInRaftGroup
(
SSyncNode
*
ths
,
SRaftId
*
pRaftId
);
SSyncSnapshotSender
*
syncNodeGetSnapshotSender
(
SSyncNode
*
ths
,
SRaftId
*
pDestId
);
SSyncTimer
*
syncNodeGetHbTimer
(
SSyncNode
*
ths
,
SRaftId
*
pDestId
);
SPeerState
*
syncNodeGetPeerState
(
SSyncNode
*
ths
,
const
SRaftId
*
pDestId
);
bool
syncNodeNeedSendAppendEntries
(
SSyncNode
*
ths
,
const
SRaftId
*
pDestId
,
const
SyncAppendEntries
*
pMsg
);
int32_t
syncGetSnapshotMeta
(
int64_t
rid
,
struct
SSnapshotMeta
*
sMeta
);
int32_t
syncGetSnapshotMetaByIndex
(
int64_t
rid
,
SyncIndex
snapshotIndex
,
struct
SSnapshotMeta
*
sMeta
);
...
...
@@ -271,7 +308,12 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p
int32_t
syncNodeDynamicQuorum
(
const
SSyncNode
*
pSyncNode
);
bool
syncNodeIsMnode
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodePeerStateInit
(
SSyncNode
*
pSyncNode
);
// trace log
void
syncLogRecvTimer
(
SSyncNode
*
pSyncNode
,
const
SyncTimeout
*
pMsg
,
const
char
*
s
);
void
syncLogSendRequestVote
(
SSyncNode
*
pSyncNode
,
const
SyncRequestVote
*
pMsg
,
const
char
*
s
);
void
syncLogRecvRequestVote
(
SSyncNode
*
pSyncNode
,
const
SyncRequestVote
*
pMsg
,
const
char
*
s
);
...
...
source/libs/sync/inc/syncRaftCfg.h
浏览文件 @
5965629b
...
...
@@ -45,8 +45,8 @@ int32_t raftCfgIndexClose(SRaftCfgIndex *pRaftCfgIndex);
int32_t
raftCfgIndexPersist
(
SRaftCfgIndex
*
pRaftCfgIndex
);
int32_t
raftCfgIndexAddConfigIndex
(
SRaftCfgIndex
*
pRaftCfgIndex
,
SyncIndex
configIndex
);
cJSON
*
raftCfgIndex2Json
(
SRaftCfgIndex
*
pRaftCfgIndex
);
char
*
raftCfgIndex2Str
(
SRaftCfgIndex
*
pRaftCfgIndex
);
cJSON
*
raftCfgIndex2Json
(
SRaftCfgIndex
*
pRaftCfgIndex
);
char
*
raftCfgIndex2Str
(
SRaftCfgIndex
*
pRaftCfgIndex
);
int32_t
raftCfgIndexFromJson
(
const
cJSON
*
pRoot
,
SRaftCfgIndex
*
pRaftCfgIndex
);
int32_t
raftCfgIndexFromStr
(
const
char
*
s
,
SRaftCfgIndex
*
pRaftCfgIndex
);
...
...
@@ -73,14 +73,14 @@ int32_t raftCfgClose(SRaftCfg *pRaftCfg);
int32_t
raftCfgPersist
(
SRaftCfg
*
pRaftCfg
);
int32_t
raftCfgAddConfigIndex
(
SRaftCfg
*
pRaftCfg
,
SyncIndex
configIndex
);
cJSON
*
syncCfg2Json
(
SSyncCfg
*
pSyncCfg
);
char
*
syncCfg2Str
(
SSyncCfg
*
pSyncCfg
);
char
*
syncCfg2SimpleStr
(
SSyncCfg
*
pSyncCfg
);
cJSON
*
syncCfg2Json
(
SSyncCfg
*
pSyncCfg
);
char
*
syncCfg2Str
(
SSyncCfg
*
pSyncCfg
);
char
*
syncCfg2SimpleStr
(
SSyncCfg
*
pSyncCfg
);
int32_t
syncCfgFromJson
(
const
cJSON
*
pRoot
,
SSyncCfg
*
pSyncCfg
);
int32_t
syncCfgFromStr
(
const
char
*
s
,
SSyncCfg
*
pSyncCfg
);
cJSON
*
raftCfg2Json
(
SRaftCfg
*
pRaftCfg
);
char
*
raftCfg2Str
(
SRaftCfg
*
pRaftCfg
);
cJSON
*
raftCfg2Json
(
SRaftCfg
*
pRaftCfg
);
char
*
raftCfg2Str
(
SRaftCfg
*
pRaftCfg
);
int32_t
raftCfgFromJson
(
const
cJSON
*
pRoot
,
SRaftCfg
*
pRaftCfg
);
int32_t
raftCfgFromStr
(
const
char
*
s
,
SRaftCfg
*
pRaftCfg
);
...
...
source/libs/sync/inc/syncReplication.h
浏览文件 @
5965629b
...
...
@@ -50,16 +50,15 @@ extern "C" {
// msource |-> i,
// mdest |-> j])
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
//
int32_t
syncNodeAppendEntriesPeers
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeAppendEntriesPeersSnapshot
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeAppendEntriesPeersSnapshot2
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeAppendEntriesOnePeer
(
SSyncNode
*
pSyncNode
,
SRaftId
*
pDestId
,
SyncIndex
nextIndex
);
int32_t
syncNodeHeartbeatPeers
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeSendHeartbeat
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
pDestId
,
const
SyncHeartbeat
*
pMsg
);
int32_t
syncNodeReplicate
(
SSyncNode
*
pSyncNode
,
bool
isTimer
);
int32_t
syncNodeAppendEntries
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
const
SyncAppendEntries
*
pMsg
);
int32_t
syncNodeAppendEntriesBatch
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
const
SyncAppendEntriesBatch
*
pMsg
);
int32_t
syncNodeReplicate
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeReplicateOne
(
SSyncNode
*
pSyncNode
,
SRaftId
*
pDestId
);
int32_t
syncNodeSendAppendEntries
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
pDestId
,
const
SyncAppendEntries
*
pMsg
);
int32_t
syncNodeMaybeSendAppendEntries
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
pDestId
,
const
SyncAppendEntries
*
pMsg
);
#ifdef __cplusplus
}
...
...
source/libs/sync/inc/syncRequestVote.h
浏览文件 @
5965629b
...
...
@@ -49,8 +49,7 @@ extern "C" {
// m)
// /\ UNCHANGED <<state, currentTerm, candidateVars, leaderVars, logVars>>
//
int32_t
syncNodeOnRequestVoteCb
(
SSyncNode
*
ths
,
SyncRequestVote
*
pMsg
);
int32_t
syncNodeOnRequestVoteSnapshotCb
(
SSyncNode
*
ths
,
SyncRequestVote
*
pMsg
);
int32_t
syncNodeOnRequestVote
(
SSyncNode
*
ths
,
SyncRequestVote
*
pMsg
);
#ifdef __cplusplus
}
...
...
source/libs/sync/inc/syncRequestVoteReply.h
浏览文件 @
5965629b
...
...
@@ -44,8 +44,7 @@ extern "C" {
// /\ Discard(m)
// /\ UNCHANGED <<serverVars, votedFor, leaderVars, logVars>>
//
int32_t
syncNodeOnRequestVoteReplyCb
(
SSyncNode
*
ths
,
SyncRequestVoteReply
*
pMsg
);
int32_t
syncNodeOnRequestVoteReplySnapshotCb
(
SSyncNode
*
ths
,
SyncRequestVoteReply
*
pMsg
);
int32_t
syncNodeOnRequestVoteReply
(
SSyncNode
*
ths
,
SyncRequestVoteReply
*
pMsg
);
#ifdef __cplusplus
}
...
...
source/libs/sync/inc/syncRespMgr.h
浏览文件 @
5965629b
...
...
@@ -32,9 +32,9 @@ typedef struct SRespStub {
}
SRespStub
;
typedef
struct
SSyncRespMgr
{
SHashObj
*
pRespHash
;
SHashObj
*
pRespHash
;
int64_t
ttl
;
void
*
data
;
void
*
data
;
TdThreadMutex
mutex
;
uint64_t
seqNum
;
}
SSyncRespMgr
;
...
...
source/libs/sync/inc/syncSnapshot.h
浏览文件 @
5965629b
...
...
@@ -28,10 +28,10 @@ extern "C" {
#include "syncMessage.h"
#include "taosdef.h"
#define SYNC_SNAPSHOT_SEQ_INVALID
-1
#define SYNC_SNAPSHOT_SEQ_INVALID -1
#define SYNC_SNAPSHOT_SEQ_FORCE_CLOSE -2
#define SYNC_SNAPSHOT_SEQ_BEGIN
0
#define SYNC_SNAPSHOT_SEQ_END
0x7FFFFFFF
#define SYNC_SNAPSHOT_SEQ_BEGIN 0
#define SYNC_SNAPSHOT_SEQ_END 0x7FFFFFFF
#define SYNC_SNAPSHOT_RETRY_MS 5000
...
...
@@ -51,6 +51,7 @@ typedef struct SSyncSnapshotSender {
int32_t
replicaIndex
;
SyncTerm
term
;
SyncTerm
privateTerm
;
int64_t
startTime
;
bool
finish
;
}
SSyncSnapshotSender
;
...
...
@@ -67,6 +68,8 @@ cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender);
char
*
snapshotSender2Str
(
SSyncSnapshotSender
*
pSender
);
char
*
snapshotSender2SimpleStr
(
SSyncSnapshotSender
*
pSender
,
char
*
event
);
int32_t
syncNodeStartSnapshot
(
SSyncNode
*
pSyncNode
,
SRaftId
*
pDestId
);
//---------------------------------------------------
typedef
struct
SSyncSnapshotReceiver
{
bool
start
;
...
...
@@ -94,8 +97,8 @@ char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event)
//---------------------------------------------------
// on message
int32_t
syncNodeOnSnapshot
SendCb
(
SSyncNode
*
ths
,
SyncSnapshotSend
*
pMsg
);
int32_t
syncNodeOnSnapshotR
spCb
(
SSyncNode
*
ths
,
SyncSnapshotRsp
*
pMsg
);
int32_t
syncNodeOnSnapshot
(
SSyncNode
*
ths
,
SyncSnapshotSend
*
pMsg
);
int32_t
syncNodeOnSnapshotR
eply
(
SSyncNode
*
ths
,
SyncSnapshotRsp
*
pMsg
);
#ifdef __cplusplus
}
...
...
source/libs/sync/inc/syncTimeout.h
浏览文件 @
5965629b
...
...
@@ -39,7 +39,7 @@ extern "C" {
// /\ voterLog' = [voterLog EXCEPT ![i] = [j \in {} |-> <<>>]]
// /\ UNCHANGED <<messages, leaderVars, logVars>>
//
int32_t
syncNodeOnTime
outCb
(
SSyncNode
*
ths
,
SyncTimeout
*
pMsg
);
int32_t
syncNodeOnTime
r
(
SSyncNode
*
ths
,
SyncTimeout
*
pMsg
);
#ifdef __cplusplus
}
...
...
source/libs/sync/src/syncAppendEntries.c
浏览文件 @
5965629b
...
...
@@ -89,240 +89,6 @@
// /\ UNCHANGED <<candidateVars, leaderVars>>
//
int32_t
syncNodeOnAppendEntriesCb
(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
)
{
int32_t
ret
=
0
;
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
ths
,
&
(
pMsg
->
srcId
))
&&
!
ths
->
pRaftCfg
->
isStandBy
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"maybe replica already dropped"
);
return
-
1
;
}
// maybe update term
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncNodeUpdateTerm
(
ths
,
pMsg
->
term
);
}
ASSERT
(
pMsg
->
term
<=
ths
->
pRaftStore
->
currentTerm
);
// reset elect timer
if
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
{
ths
->
leaderCache
=
pMsg
->
srcId
;
syncNodeResetElectTimer
(
ths
);
}
ASSERT
(
pMsg
->
dataLen
>=
0
);
// return to follower state
if
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
&&
ths
->
state
==
TAOS_SYNC_STATE_CANDIDATE
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"candidate to follower"
);
syncNodeBecomeFollower
(
ths
,
"from candidate by append entries"
);
return
-
1
;
// ret or reply?
}
SyncTerm
localPreLogTerm
=
0
;
if
(
pMsg
->
prevLogIndex
>=
SYNC_INDEX_BEGIN
&&
pMsg
->
prevLogIndex
<=
ths
->
pLogStore
->
getLastIndex
(
ths
->
pLogStore
))
{
SSyncRaftEntry
*
pEntry
=
ths
->
pLogStore
->
getEntry
(
ths
->
pLogStore
,
pMsg
->
prevLogIndex
);
if
(
pEntry
==
NULL
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"getEntry error, index:%"
PRId64
", since %s"
,
pMsg
->
prevLogIndex
,
terrstr
());
syncNodeErrorLog
(
ths
,
logBuf
);
return
-
1
;
}
localPreLogTerm
=
pEntry
->
term
;
syncEntryDestory
(
pEntry
);
}
bool
logOK
=
(
pMsg
->
prevLogIndex
==
SYNC_INDEX_INVALID
)
||
((
pMsg
->
prevLogIndex
>=
SYNC_INDEX_BEGIN
)
&&
(
pMsg
->
prevLogIndex
<=
ths
->
pLogStore
->
getLastIndex
(
ths
->
pLogStore
))
&&
(
pMsg
->
prevLogTerm
==
localPreLogTerm
));
// reject request
if
((
pMsg
->
term
<
ths
->
pRaftStore
->
currentTerm
)
||
((
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
&&
(
ths
->
state
==
TAOS_SYNC_STATE_FOLLOWER
)
&&
!
logOK
))
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"reject"
);
SyncAppendEntriesReply
*
pReply
=
syncAppendEntriesReplyBuild
(
ths
->
vgId
);
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
pRaftStore
->
currentTerm
;
pReply
->
success
=
false
;
pReply
->
matchIndex
=
SYNC_INDEX_INVALID
;
pReply
->
startTime
=
ths
->
startTime
;
// msg event log
syncLogSendAppendEntriesReply
(
ths
,
pReply
,
""
);
SRpcMsg
rpcMsg
;
syncAppendEntriesReply2RpcMsg
(
pReply
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcMsg
);
syncAppendEntriesReplyDestroy
(
pReply
);
return
ret
;
}
// accept request
if
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
&&
ths
->
state
==
TAOS_SYNC_STATE_FOLLOWER
&&
logOK
)
{
// preIndex = -1, or has preIndex entry in local log
ASSERT
(
pMsg
->
prevLogIndex
<=
ths
->
pLogStore
->
getLastIndex
(
ths
->
pLogStore
));
// has extra entries (> preIndex) in local log
bool
hasExtraEntries
=
pMsg
->
prevLogIndex
<
ths
->
pLogStore
->
getLastIndex
(
ths
->
pLogStore
);
// has entries in SyncAppendEntries msg
bool
hasAppendEntries
=
pMsg
->
dataLen
>
0
;
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"accept"
);
if
(
hasExtraEntries
&&
hasAppendEntries
)
{
// not conflict by default
bool
conflict
=
false
;
SyncIndex
extraIndex
=
pMsg
->
prevLogIndex
+
1
;
SSyncRaftEntry
*
pExtraEntry
=
ths
->
pLogStore
->
getEntry
(
ths
->
pLogStore
,
extraIndex
);
if
(
pExtraEntry
==
NULL
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"getEntry error2, index:%"
PRId64
", since %s"
,
extraIndex
,
terrstr
());
syncNodeErrorLog
(
ths
,
logBuf
);
return
-
1
;
}
SSyncRaftEntry
*
pAppendEntry
=
syncEntryDeserialize
(
pMsg
->
data
,
pMsg
->
dataLen
);
if
(
pAppendEntry
==
NULL
)
{
syncNodeErrorLog
(
ths
,
"syncEntryDeserialize pAppendEntry error"
);
return
-
1
;
}
// log not match, conflict
ASSERT
(
extraIndex
==
pAppendEntry
->
index
);
if
(
pExtraEntry
->
term
!=
pAppendEntry
->
term
)
{
conflict
=
true
;
}
if
(
conflict
)
{
// roll back
SyncIndex
delBegin
=
ths
->
pLogStore
->
getLastIndex
(
ths
->
pLogStore
);
SyncIndex
delEnd
=
extraIndex
;
sTrace
(
"syncNodeOnAppendEntriesCb --> conflict:%d, delBegin:%"
PRId64
", delEnd:%"
PRId64
,
conflict
,
delBegin
,
delEnd
);
// notice! reverse roll back!
for
(
SyncIndex
index
=
delEnd
;
index
>=
delBegin
;
--
index
)
{
if
(
ths
->
pFsm
->
FpRollBackCb
!=
NULL
)
{
SSyncRaftEntry
*
pRollBackEntry
=
ths
->
pLogStore
->
getEntry
(
ths
->
pLogStore
,
index
);
if
(
pRollBackEntry
==
NULL
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"getEntry error3, index:%"
PRId64
", since %s"
,
index
,
terrstr
());
syncNodeErrorLog
(
ths
,
logBuf
);
return
-
1
;
}
// if (pRollBackEntry->msgType != TDMT_SYNC_NOOP) {
if
(
syncUtilUserRollback
(
pRollBackEntry
->
msgType
))
{
SRpcMsg
rpcMsg
;
syncEntry2OriginalRpc
(
pRollBackEntry
,
&
rpcMsg
);
SFsmCbMeta
cbMeta
=
{
0
};
cbMeta
.
index
=
pRollBackEntry
->
index
;
cbMeta
.
lastConfigIndex
=
syncNodeGetSnapshotConfigIndex
(
ths
,
cbMeta
.
index
);
cbMeta
.
isWeak
=
pRollBackEntry
->
isWeak
;
cbMeta
.
code
=
0
;
cbMeta
.
state
=
ths
->
state
;
cbMeta
.
seqNum
=
pRollBackEntry
->
seqNum
;
ths
->
pFsm
->
FpRollBackCb
(
ths
->
pFsm
,
&
rpcMsg
,
cbMeta
);
rpcFreeCont
(
rpcMsg
.
pCont
);
}
syncEntryDestory
(
pRollBackEntry
);
}
}
// delete confict entries
ths
->
pLogStore
->
truncate
(
ths
->
pLogStore
,
extraIndex
);
// append new entries
ths
->
pLogStore
->
appendEntry
(
ths
->
pLogStore
,
pAppendEntry
);
// pre commit
syncNodePreCommit
(
ths
,
pAppendEntry
,
0
);
}
// free memory
syncEntryDestory
(
pExtraEntry
);
syncEntryDestory
(
pAppendEntry
);
}
else
if
(
hasExtraEntries
&&
!
hasAppendEntries
)
{
// do nothing
}
else
if
(
!
hasExtraEntries
&&
hasAppendEntries
)
{
SSyncRaftEntry
*
pAppendEntry
=
syncEntryDeserialize
(
pMsg
->
data
,
pMsg
->
dataLen
);
if
(
pAppendEntry
==
NULL
)
{
syncNodeErrorLog
(
ths
,
"syncEntryDeserialize pAppendEntry2 error"
);
return
-
1
;
}
// append new entries
ths
->
pLogStore
->
appendEntry
(
ths
->
pLogStore
,
pAppendEntry
);
// pre commit
syncNodePreCommit
(
ths
,
pAppendEntry
,
0
);
// free memory
syncEntryDestory
(
pAppendEntry
);
}
else
if
(
!
hasExtraEntries
&&
!
hasAppendEntries
)
{
// do nothing
}
else
{
syncNodeLog3
(
""
,
ths
);
ASSERT
(
0
);
}
SyncAppendEntriesReply
*
pReply
=
syncAppendEntriesReplyBuild
(
ths
->
vgId
);
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
pRaftStore
->
currentTerm
;
pReply
->
success
=
true
;
if
(
hasAppendEntries
)
{
pReply
->
matchIndex
=
pMsg
->
prevLogIndex
+
1
;
}
else
{
pReply
->
matchIndex
=
pMsg
->
prevLogIndex
;
}
pReply
->
startTime
=
ths
->
startTime
;
// msg event log
syncLogSendAppendEntriesReply
(
ths
,
pReply
,
""
);
SRpcMsg
rpcMsg
;
syncAppendEntriesReply2RpcMsg
(
pReply
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcMsg
);
syncAppendEntriesReplyDestroy
(
pReply
);
// maybe update commit index from leader
if
(
pMsg
->
commitIndex
>
ths
->
commitIndex
)
{
// has commit entry in local
if
(
pMsg
->
commitIndex
<=
ths
->
pLogStore
->
getLastIndex
(
ths
->
pLogStore
))
{
SyncIndex
beginIndex
=
ths
->
commitIndex
+
1
;
SyncIndex
endIndex
=
pMsg
->
commitIndex
;
// update commit index
ths
->
commitIndex
=
pMsg
->
commitIndex
;
// call back Wal
ths
->
pLogStore
->
updateCommitIndex
(
ths
->
pLogStore
,
ths
->
commitIndex
);
int32_t
code
=
syncNodeCommit
(
ths
,
beginIndex
,
endIndex
,
ths
->
state
);
ASSERT
(
code
==
0
);
}
}
}
return
ret
;
}
static
int32_t
syncNodeMakeLogSame
(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
)
{
int32_t
code
;
...
...
@@ -408,7 +174,7 @@ static int32_t syncNodeDoMakeLogSame(SSyncNode* ths, SyncIndex FromIndex) {
do
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"update delete begin to %
"
PRId64
,
delBegin
);
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"update delete begin to %
ld"
,
delBegin
);
syncNodeEventLog
(
ths
,
logBuf
);
}
while
(
0
);
}
...
...
@@ -419,8 +185,7 @@ static int32_t syncNodeDoMakeLogSame(SSyncNode* ths, SyncIndex FromIndex) {
do
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"make log same from:%"
PRId64
", delbegin:%"
PRId64
", pass:%d"
,
FromIndex
,
delBegin
,
pass
);
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"make log same from:%ld, delbegin:%ld, pass:%d"
,
FromIndex
,
delBegin
,
pass
);
syncNodeEventLog
(
ths
,
logBuf
);
}
while
(
0
);
...
...
@@ -505,544 +270,222 @@ static bool syncNodeOnAppendEntriesLogOK(SSyncNode* pSyncNode, SyncAppendEntries
return
false
;
}
int32_t
syncNodeOnAppendEntriesSnapshot2Cb
(
SSyncNode
*
ths
,
SyncAppendEntriesBatch
*
pMsg
)
{
int32_t
ret
=
0
;
int32_t
code
=
0
;
int32_t
syncNodeFollowerCommit
(
SSyncNode
*
ths
,
SyncIndex
newCommitIndex
)
{
// maybe update commit index, leader notice me
if
(
newCommitIndex
>
ths
->
commitIndex
)
{
// has commit entry in local
if
(
newCommitIndex
<=
ths
->
pLogStore
->
syncLogLastIndex
(
ths
->
pLogStore
))
{
// advance commit index to sanpshot first
SSnapshot
snapshot
;
ths
->
pFsm
->
FpGetSnapshotInfo
(
ths
->
pFsm
,
&
snapshot
);
if
(
snapshot
.
lastApplyIndex
>=
0
&&
snapshot
.
lastApplyIndex
>
ths
->
commitIndex
)
{
SyncIndex
commitBegin
=
ths
->
commitIndex
;
SyncIndex
commitEnd
=
snapshot
.
lastApplyIndex
;
ths
->
commitIndex
=
snapshot
.
lastApplyIndex
;
char
eventLog
[
128
];
snprintf
(
eventLog
,
sizeof
(
eventLog
),
"commit by snapshot from index:%"
PRId64
" to index:%"
PRId64
,
commitBegin
,
commitEnd
);
syncNodeEventLog
(
ths
,
eventLog
);
}
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
ths
,
&
(
pMsg
->
srcId
))
&&
!
ths
->
pRaftCfg
->
isStandBy
)
{
syncLogRecvAppendEntriesBatch
(
ths
,
pMsg
,
"maybe replica already dropped"
);
return
-
1
;
}
SyncIndex
beginIndex
=
ths
->
commitIndex
+
1
;
SyncIndex
endIndex
=
newCommitIndex
;
// maybe update term
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncNodeUpdateTerm
(
ths
,
pMsg
->
term
);
}
ASSERT
(
pMsg
->
term
<=
ths
->
pRaftStore
->
currentTerm
);
// update commit index
ths
->
commitIndex
=
newCommitIndex
;
// reset elect timer
if
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
{
ths
->
leaderCache
=
pMsg
->
srcId
;
syncNodeResetElectTimer
(
ths
);
}
ASSERT
(
pMsg
->
dataLen
>=
0
);
// call back Wal
int32_t
code
=
ths
->
pLogStore
->
syncLogUpdateCommitIndex
(
ths
->
pLogStore
,
ths
->
commitIndex
);
ASSERT
(
code
==
0
);
// candidate to follower
//
// operation:
// to follower
do
{
bool
condition
=
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
&&
ths
->
state
==
TAOS_SYNC_STATE_CANDIDATE
;
if
(
condition
)
{
syncLogRecvAppendEntriesBatch
(
ths
,
pMsg
,
"candidate to follower"
);
syncNodeBecomeFollower
(
ths
,
"from candidate by append entries"
);
return
0
;
// do not reply?
code
=
syncNodeDoCommit
(
ths
,
beginIndex
,
endIndex
,
ths
->
state
);
ASSERT
(
code
==
0
);
}
}
while
(
0
);
}
// fake match
//
// condition1:
// preIndex <= my commit index
//
// operation:
// if hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex, append entry
// match my-commit-index or my-commit-index + batchSize
do
{
bool
condition
=
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
&&
(
ths
->
state
==
TAOS_SYNC_STATE_FOLLOWER
)
&&
(
pMsg
->
prevLogIndex
<=
ths
->
commitIndex
);
if
(
condition
)
{
syncLogRecvAppendEntriesBatch
(
ths
,
pMsg
,
"fake match"
);
SyncIndex
matchIndex
=
ths
->
commitIndex
;
bool
hasAppendEntries
=
pMsg
->
dataLen
>
0
;
SOffsetAndContLen
*
metaTableArr
=
syncAppendEntriesBatchMetaTableArray
(
pMsg
);
if
(
hasAppendEntries
&&
pMsg
->
prevLogIndex
==
ths
->
commitIndex
)
{
int32_t
pass
=
0
;
SyncIndex
logLastIndex
=
ths
->
pLogStore
->
syncLogLastIndex
(
ths
->
pLogStore
);
bool
hasExtraEntries
=
logLastIndex
>
pMsg
->
prevLogIndex
;
// make log same
if
(
hasExtraEntries
)
{
// make log same, rollback deleted entries
pass
=
syncNodeDoMakeLogSame
(
ths
,
pMsg
->
prevLogIndex
+
1
);
ASSERT
(
pass
>=
0
);
}
return
0
;
}
// append entry batch
if
(
pass
==
0
)
{
// assert! no batch
ASSERT
(
pMsg
->
dataCount
<=
1
);
int32_t
syncNodeOnAppendEntries
(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
)
{
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
ths
,
&
(
pMsg
->
srcId
))
&&
!
ths
->
pRaftCfg
->
isStandBy
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"ignore, maybe replica already dropped"
);
goto
_IGNORE
;
}
for
(
int32_t
i
=
0
;
i
<
pMsg
->
dataCount
;
++
i
)
{
SSyncRaftEntry
*
pAppendEntry
=
(
SSyncRaftEntry
*
)(
pMsg
->
data
+
metaTableArr
[
i
].
offset
);
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pAppendEntry
);
if
(
code
!=
0
)
{
sError
(
"vgId:%d, failed to append log entry since %s"
,
ths
->
vgId
,
tstrerror
(
terrno
));
return
-
1
;
}
// prepare response msg
SyncAppendEntriesReply
*
pReply
=
syncAppendEntriesReplyBuild
(
ths
->
vgId
);
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
pRaftStore
->
currentTerm
;
pReply
->
success
=
false
;
// pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
pReply
->
matchIndex
=
SYNC_INDEX_INVALID
;
pReply
->
lastSendIndex
=
pMsg
->
prevLogIndex
+
1
;
pReply
->
privateTerm
=
ths
->
pNewNodeReceiver
->
privateTerm
;
pReply
->
startTime
=
ths
->
startTime
;
if
(
pMsg
->
term
<
ths
->
pRaftStore
->
currentTerm
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"reject, small term"
);
goto
_SEND_RESPONSE
;
}
code
=
syncNodePreCommit
(
ths
,
pAppendEntry
,
0
);
ASSERT
(
code
==
0
);
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
pReply
->
term
=
pMsg
->
term
;
}
// syncEntryDestory(pAppendEntry);
}
}
syncNodeStepDown
(
ths
,
pMsg
->
term
);
syncNodeResetElectTimer
(
ths
);
// fsync once
SSyncLogStoreData
*
pData
=
ths
->
pLogStore
->
data
;
SWal
*
pWal
=
pData
->
pWal
;
walFsync
(
pWal
,
false
);
SyncIndex
startIndex
=
ths
->
pLogStore
->
syncLogBeginIndex
(
ths
->
pLogStore
);
SyncIndex
lastIndex
=
ths
->
pLogStore
->
syncLogLastIndex
(
ths
->
pLogStore
);
// update match index
matchIndex
=
pMsg
->
prevLogIndex
+
pMsg
->
dataCount
;
}
if
(
pMsg
->
prevLogIndex
>
lastIndex
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"reject, index not match"
);
goto
_SEND_RESPONSE
;
}
// prepare response msg
SyncAppendEntriesReply
*
pReply
=
syncAppendEntriesReplyBuild
(
ths
->
vgId
);
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
pRaftStore
->
currentTerm
;
pReply
->
privateTerm
=
ths
->
pNewNodeReceiver
->
privateTerm
;
pReply
->
success
=
true
;
pReply
->
matchIndex
=
matchIndex
;
pReply
->
startTime
=
ths
->
startTime
;
// msg event log
syncLogSendAppendEntriesReply
(
ths
,
pReply
,
""
);
// send response
SRpcMsg
rpcMsg
;
syncAppendEntriesReply2RpcMsg
(
pReply
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcMsg
);
syncAppendEntriesReplyDestroy
(
pReply
);
return
0
;
}
}
while
(
0
);
if
(
pMsg
->
prevLogIndex
>=
startIndex
)
{
SyncTerm
myPreLogTerm
=
syncNodeGetPreTerm
(
ths
,
pMsg
->
prevLogIndex
+
1
);
ASSERT
(
myPreLogTerm
!=
SYNC_TERM_INVALID
);
// calculate logOK here, before will coredump, due to fake match
bool
logOK
=
syncNodeOnAppendEntriesBatchLogOK
(
ths
,
pMsg
);
// not match
//
// condition1:
// term < myTerm
//
// condition2:
// !logOK
//
// operation:
// not match
// no operation on log
do
{
bool
condition1
=
pMsg
->
term
<
ths
->
pRaftStore
->
currentTerm
;
bool
condition2
=
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
&&
(
ths
->
state
==
TAOS_SYNC_STATE_FOLLOWER
)
&&
!
logOK
;
bool
condition
=
condition1
||
condition2
;
if
(
condition
)
{
syncLogRecvAppendEntriesBatch
(
ths
,
pMsg
,
"not match"
);
// maybe update commit index by snapshot
syncNodeMaybeUpdateCommitBySnapshot
(
ths
);
// prepare response msg
SyncAppendEntriesReply
*
pReply
=
syncAppendEntriesReplyBuild
(
ths
->
vgId
);
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
pRaftStore
->
currentTerm
;
pReply
->
privateTerm
=
ths
->
pNewNodeReceiver
->
privateTerm
;
pReply
->
success
=
false
;
pReply
->
matchIndex
=
ths
->
commitIndex
;
pReply
->
startTime
=
ths
->
startTime
;
// msg event log
syncLogSendAppendEntriesReply
(
ths
,
pReply
,
""
);
// send response
SRpcMsg
rpcMsg
;
syncAppendEntriesReply2RpcMsg
(
pReply
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcMsg
);
syncAppendEntriesReplyDestroy
(
pReply
);
return
0
;
if
(
myPreLogTerm
!=
pMsg
->
prevLogTerm
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"reject, pre-term not match"
);
goto
_SEND_RESPONSE
;
}
}
while
(
0
);
// really match
//
// condition:
// logOK
//
// operation:
// match
// make log same
do
{
bool
condition
=
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
&&
(
ths
->
state
==
TAOS_SYNC_STATE_FOLLOWER
)
&&
logOK
;
if
(
condition
)
{
// has extra entries (> preIndex) in local log
SyncIndex
myLastIndex
=
syncNodeGetLastIndex
(
ths
);
bool
hasExtraEntries
=
myLastIndex
>
pMsg
->
prevLogIndex
;
}
// has entries in SyncAppendEntries msg
bool
hasAppendEntries
=
pMsg
->
dataLen
>
0
;
SOffsetAndContLen
*
metaTableArr
=
syncAppendEntriesBatchMetaTableArray
(
pMsg
);
// accept
pReply
->
success
=
true
;
bool
hasAppendEntries
=
pMsg
->
dataLen
>
0
;
if
(
hasAppendEntries
)
{
SSyncRaftEntry
*
pAppendEntry
=
syncEntryDeserialize
(
pMsg
->
data
,
pMsg
->
dataLen
);
ASSERT
(
pAppendEntry
!=
NULL
);
syncLogRecvAppendEntriesBatch
(
ths
,
pMsg
,
"really match"
);
SyncIndex
appendIndex
=
pMsg
->
prevLogIndex
+
1
;
SSyncRaftEntry
*
pLocalEntry
=
NULL
;
int32_t
code
=
ths
->
pLogStore
->
syncLogGetEntry
(
ths
->
pLogStore
,
appendIndex
,
&
pLocalEntry
);
if
(
code
==
0
)
{
if
(
pLocalEntry
->
term
==
pAppendEntry
->
term
)
{
// do nothing
int32_t
pass
=
0
;
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"log match, do nothing, index:%ld"
,
appendIndex
);
syncNodeEventLog
(
ths
,
logBuf
);
if
(
hasExtraEntries
)
{
// make log same, rollback deleted entries
pass
=
syncNodeDoMakeLogSame
(
ths
,
pMsg
->
prevLogIndex
+
1
);
ASSERT
(
pass
>=
0
);
}
}
else
{
// truncate
code
=
ths
->
pLogStore
->
syncLogTruncate
(
ths
->
pLogStore
,
appendIndex
);
if
(
code
!=
0
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"ignore, truncate error, append-index:%ld"
,
appendIndex
);
syncLogRecvAppendEntries
(
ths
,
pMsg
,
logBuf
);
if
(
hasAppendEntries
)
{
// append entry batch
if
(
pass
==
0
)
{
// assert! no batch
ASSERT
(
pMsg
->
dataCount
<=
1
);
// append entry batch
for
(
int32_t
i
=
0
;
i
<
pMsg
->
dataCount
;
++
i
)
{
SSyncRaftEntry
*
pAppendEntry
=
(
SSyncRaftEntry
*
)(
pMsg
->
data
+
metaTableArr
[
i
].
offset
);
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pAppendEntry
);
if
(
code
!=
0
)
{
sError
(
"vgId:%d, failed to append log entry since %s"
,
ths
->
vgId
,
tstrerror
(
terrno
));
return
-
1
;
}
code
=
syncNodePreCommit
(
ths
,
pAppendEntry
,
0
);
ASSERT
(
code
==
0
);
// syncEntryDestory(pAppendEntry);
}
goto
_IGNORE
;
}
// fsync once
SSyncLogStoreData
*
pData
=
ths
->
pLogStore
->
data
;
SWal
*
pWal
=
pData
->
pWal
;
walFsync
(
pWal
,
false
);
}
// append
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pAppendEntry
);
if
(
code
!=
0
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"ignore, append error, append-index:%ld"
,
appendIndex
);
syncLogRecvAppendEntries
(
ths
,
pMsg
,
logBuf
);
// prepare response msg
SyncAppendEntriesReply
*
pReply
=
syncAppendEntriesReplyBuild
(
ths
->
vgId
);
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
pRaftStore
->
currentTerm
;
pReply
->
privateTerm
=
ths
->
pNewNodeReceiver
->
privateTerm
;
pReply
->
success
=
true
;
pReply
->
matchIndex
=
hasAppendEntries
?
pMsg
->
prevLogIndex
+
pMsg
->
dataCount
:
pMsg
->
prevLogIndex
;
pReply
->
startTime
=
ths
->
startTime
;
// msg event log
syncLogSendAppendEntriesReply
(
ths
,
pReply
,
""
);
// send response
SRpcMsg
rpcMsg
;
syncAppendEntriesReply2RpcMsg
(
pReply
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcMsg
);
syncAppendEntriesReplyDestroy
(
pReply
);
// maybe update commit index, leader notice me
if
(
pMsg
->
commitIndex
>
ths
->
commitIndex
)
{
SyncIndex
lastIndex
=
ths
->
pLogStore
->
syncLogLastIndex
(
ths
->
pLogStore
);
SyncIndex
beginIndex
=
0
;
SyncIndex
endIndex
=
-
1
;
// has commit entry in local
if
(
pMsg
->
commitIndex
<=
lastIndex
)
{
beginIndex
=
ths
->
commitIndex
+
1
;
endIndex
=
pMsg
->
commitIndex
;
// update commit index
ths
->
commitIndex
=
pMsg
->
commitIndex
;
// call back Wal
code
=
ths
->
pLogStore
->
updateCommitIndex
(
ths
->
pLogStore
,
ths
->
commitIndex
);
ASSERT
(
code
==
0
);
}
else
if
(
pMsg
->
commitIndex
>
lastIndex
&&
ths
->
commitIndex
<
lastIndex
)
{
beginIndex
=
ths
->
commitIndex
+
1
;
endIndex
=
lastIndex
;
// update commit index, speed up
ths
->
commitIndex
=
lastIndex
;
// call back Wal
code
=
ths
->
pLogStore
->
updateCommitIndex
(
ths
->
pLogStore
,
ths
->
commitIndex
);
ASSERT
(
code
==
0
);
goto
_IGNORE
;
}
code
=
syncNodeCommit
(
ths
,
beginIndex
,
endIndex
,
ths
->
state
);
ASSERT
(
code
==
0
);
}
return
0
;
}
}
while
(
0
);
return
0
;
}
int32_t
syncNodeOnAppendEntriesSnapshotCb
(
SSyncNode
*
ths
,
SyncAppendEntries
*
pMsg
)
{
int32_t
ret
=
0
;
int32_t
code
=
0
;
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
ths
,
&
(
pMsg
->
srcId
))
&&
!
ths
->
pRaftCfg
->
isStandBy
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"maybe replica already dropped"
);
return
-
1
;
}
// maybe update term
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncNodeUpdateTerm
(
ths
,
pMsg
->
term
);
}
ASSERT
(
pMsg
->
term
<=
ths
->
pRaftStore
->
currentTerm
);
// reset elect timer
if
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
{
ths
->
leaderCache
=
pMsg
->
srcId
;
syncNodeResetElectTimer
(
ths
);
}
ASSERT
(
pMsg
->
dataLen
>=
0
);
}
else
{
if
(
terrno
==
TSDB_CODE_WAL_LOG_NOT_EXIST
)
{
// log not exist
// candidate to follower
//
// operation:
// to follower
do
{
bool
condition
=
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
&&
ths
->
state
==
TAOS_SYNC_STATE_CANDIDATE
;
if
(
condition
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"candidate to follower"
);
syncNodeBecomeFollower
(
ths
,
"from candidate by append entries"
);
return
0
;
// do not reply?
}
}
while
(
0
);
// truncate
code
=
ths
->
pLogStore
->
syncLogTruncate
(
ths
->
pLogStore
,
appendIndex
);
if
(
code
!=
0
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"ignore, log not exist, truncate error, append-index:%ld"
,
appendIndex
);
syncLogRecvAppendEntries
(
ths
,
pMsg
,
logBuf
);
// fake match
//
// condition1:
// preIndex <= my commit index
//
// operation:
// if hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex, append entry
// match my-commit-index or my-commit-index + 1
// no operation on log
do
{
bool
condition
=
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
&&
(
ths
->
state
==
TAOS_SYNC_STATE_FOLLOWER
)
&&
(
pMsg
->
prevLogIndex
<=
ths
->
commitIndex
);
if
(
condition
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"fake match"
);
SyncIndex
matchIndex
=
ths
->
commitIndex
;
bool
hasAppendEntries
=
pMsg
->
dataLen
>
0
;
if
(
hasAppendEntries
&&
pMsg
->
prevLogIndex
==
ths
->
commitIndex
)
{
// append entry
SSyncRaftEntry
*
pAppendEntry
=
syncEntryDeserialize
(
pMsg
->
data
,
pMsg
->
dataLen
);
ASSERT
(
pAppendEntry
!=
NULL
);
{
// has extra entries (> preIndex) in local log
SyncIndex
logLastIndex
=
ths
->
pLogStore
->
syncLogLastIndex
(
ths
->
pLogStore
);
bool
hasExtraEntries
=
logLastIndex
>
pMsg
->
prevLogIndex
;
if
(
hasExtraEntries
)
{
// make log same, rollback deleted entries
code
=
syncNodeMakeLogSame
(
ths
,
pMsg
);
ASSERT
(
code
==
0
);
}
goto
_IGNORE
;
}
// append
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pAppendEntry
);
if
(
code
!=
0
)
{
sError
(
"vgId:%d, failed to append log entry since %s"
,
ths
->
vgId
,
tstrerror
(
terrno
))
;
return
-
1
;
}
char
logBuf
[
128
]
;
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"ignore, log not exist, append error, append-index:%ld"
,
appendIndex
)
;
syncLogRecvAppendEntries
(
ths
,
pMsg
,
logBuf
);
// pre commit
code
=
syncNodePreCommit
(
ths
,
pAppendEntry
,
0
);
ASSERT
(
code
==
0
);
goto
_IGNORE
;
}
// update match index
matchIndex
=
pMsg
->
prevLogIndex
+
1
;
}
else
{
// error
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"ignore, get local entry error, append-index:%ld"
,
appendIndex
);
syncLogRecvAppendEntries
(
ths
,
pMsg
,
logBuf
);
syncEntryDestory
(
pAppendEntry
)
;
goto
_IGNORE
;
}
// prepare response msg
SyncAppendEntriesReply
*
pReply
=
syncAppendEntriesReplyBuild
(
ths
->
vgId
);
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
pRaftStore
->
currentTerm
;
pReply
->
privateTerm
=
ths
->
pNewNodeReceiver
->
privateTerm
;
pReply
->
success
=
true
;
pReply
->
matchIndex
=
matchIndex
;
pReply
->
startTime
=
ths
->
startTime
;
// msg event log
syncLogSendAppendEntriesReply
(
ths
,
pReply
,
""
);
// send response
SRpcMsg
rpcMsg
;
syncAppendEntriesReply2RpcMsg
(
pReply
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcMsg
);
syncAppendEntriesReplyDestroy
(
pReply
);
return
ret
;
}
}
while
(
0
);
// calculate logOK here, before will coredump, due to fake match
bool
logOK
=
syncNodeOnAppendEntriesLogOK
(
ths
,
pMsg
);
// not match
//
// condition1:
// term < myTerm
//
// condition2:
// !logOK
//
// operation:
// not match
// no operation on log
do
{
bool
condition1
=
pMsg
->
term
<
ths
->
pRaftStore
->
currentTerm
;
bool
condition2
=
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
&&
(
ths
->
state
==
TAOS_SYNC_STATE_FOLLOWER
)
&&
!
logOK
;
bool
condition
=
condition1
||
condition2
;
if
(
condition
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"not match"
);
// prepare response msg
SyncAppendEntriesReply
*
pReply
=
syncAppendEntriesReplyBuild
(
ths
->
vgId
);
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
pRaftStore
->
currentTerm
;
pReply
->
privateTerm
=
ths
->
pNewNodeReceiver
->
privateTerm
;
pReply
->
success
=
false
;
pReply
->
matchIndex
=
SYNC_INDEX_INVALID
;
pReply
->
startTime
=
ths
->
startTime
;
// msg event log
syncLogSendAppendEntriesReply
(
ths
,
pReply
,
""
);
// send response
SRpcMsg
rpcMsg
;
syncAppendEntriesReply2RpcMsg
(
pReply
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcMsg
);
syncAppendEntriesReplyDestroy
(
pReply
);
return
ret
;
}
}
while
(
0
);
#if 0
if (code != 0 && terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
ASSERT(code == 0);
// really match
//
// condition:
// logOK
//
// operation:
// match
// make log same
do
{
bool
condition
=
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
&&
(
ths
->
state
==
TAOS_SYNC_STATE_FOLLOWER
)
&&
logOK
;
if
(
condition
)
{
// has extra entries (> preIndex) in local log
SyncIndex
myLastIndex
=
syncNodeGetLastIndex
(
ths
);
bool
hasExtraEntries
=
myLastIndex
>
pMsg
->
prevLogIndex
;
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
ASSERT(code == 0);
// has entries in SyncAppendEntries msg
bool
hasAppendEntries
=
pMsg
->
dataLen
>
0
;
} else {
ASSERT(code == 0)
;
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"really match"
);
if (pLocalEntry->term == pAppendEntry->term) {
// do nothing
} else {
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
ASSERT(code == 0);
if
(
hasExtraEntries
)
{
// make log same, rollback deleted entries
code
=
syncNodeMakeLogSame
(
ths
,
pMsg
);
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
ASSERT(code == 0);
}
}
#endif
if
(
hasAppendEntries
)
{
// append entry
SSyncRaftEntry
*
pAppendEntry
=
syncEntryDeserialize
(
pMsg
->
data
,
pMsg
->
dataLen
);
ASSERT
(
pAppendEntry
!=
NULL
);
// update match index
pReply
->
matchIndex
=
pAppendEntry
->
index
;
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pAppendEntry
);
if
(
code
!=
0
)
{
sError
(
"vgId:%d, failed to append log entry since %s"
,
ths
->
vgId
,
tstrerror
(
terrno
));
return
-
1
;
}
syncEntryDestory
(
pLocalEntry
);
syncEntryDestory
(
pAppendEntry
);
// pre commit
code
=
syncNodePreCommit
(
ths
,
pAppendEntry
,
0
);
ASSERT
(
code
==
0
);
}
else
{
// no append entries, do nothing
// maybe has extra entries, no harm
syncEntryDestory
(
pAppendEntry
);
}
// update match index
pReply
->
matchIndex
=
pMsg
->
prevLogIndex
;
}
// prepare response msg
SyncAppendEntriesReply
*
pReply
=
syncAppendEntriesReplyBuild
(
ths
->
vgId
);
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
pRaftStore
->
currentTerm
;
pReply
->
privateTerm
=
ths
->
pNewNodeReceiver
->
privateTerm
;
pReply
->
success
=
true
;
pReply
->
matchIndex
=
hasAppendEntries
?
pMsg
->
prevLogIndex
+
1
:
pMsg
->
prevLogIndex
;
pReply
->
startTime
=
ths
->
startTime
;
// msg event log
syncLogSendAppendEntriesReply
(
ths
,
pReply
,
""
);
// send response
SRpcMsg
rpcMsg
;
syncAppendEntriesReply2RpcMsg
(
pReply
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcMsg
);
syncAppendEntriesReplyDestroy
(
pReply
);
// maybe update commit index, leader notice me
if
(
pMsg
->
commitIndex
>
ths
->
commitIndex
)
{
// has commit entry in local
if
(
pMsg
->
commitIndex
<=
ths
->
pLogStore
->
syncLogLastIndex
(
ths
->
pLogStore
))
{
// advance commit index to sanpshot first
SSnapshot
snapshot
;
ths
->
pFsm
->
FpGetSnapshotInfo
(
ths
->
pFsm
,
&
snapshot
);
if
(
snapshot
.
lastApplyIndex
>=
0
&&
snapshot
.
lastApplyIndex
>
ths
->
commitIndex
)
{
SyncIndex
commitBegin
=
ths
->
commitIndex
;
SyncIndex
commitEnd
=
snapshot
.
lastApplyIndex
;
ths
->
commitIndex
=
snapshot
.
lastApplyIndex
;
char
eventLog
[
128
];
snprintf
(
eventLog
,
sizeof
(
eventLog
),
"commit by snapshot from index:%"
PRId64
" to index:%"
PRId64
,
commitBegin
,
commitEnd
);
syncNodeEventLog
(
ths
,
eventLog
);
}
SyncIndex
beginIndex
=
ths
->
commitIndex
+
1
;
SyncIndex
endIndex
=
pMsg
->
commitIndex
;
// update commit index
ths
->
commitIndex
=
pMsg
->
commitIndex
;
// call back Wal
code
=
ths
->
pLogStore
->
updateCommitIndex
(
ths
->
pLogStore
,
ths
->
commitIndex
);
ASSERT
(
code
==
0
);
code
=
syncNodeCommit
(
ths
,
beginIndex
,
endIndex
,
ths
->
state
);
ASSERT
(
code
==
0
);
}
}
return
ret
;
}
}
while
(
0
);
// maybe update commit index, leader notice me
syncNodeFollowerCommit
(
ths
,
pMsg
->
commitIndex
);
return
ret
;
}
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"accept"
);
goto
_SEND_RESPONSE
;
_IGNORE:
syncAppendEntriesReplyDestroy
(
pReply
);
return
0
;
_SEND_RESPONSE:
// msg event log
syncLogSendAppendEntriesReply
(
ths
,
pReply
,
""
);
// send response
SRpcMsg
rpcMsg
;
syncAppendEntriesReply2RpcMsg
(
pReply
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcMsg
);
syncAppendEntriesReplyDestroy
(
pReply
);
return
0
;
}
\ No newline at end of file
source/libs/sync/src/syncAppendEntriesReply.c
浏览文件 @
5965629b
...
...
@@ -20,6 +20,7 @@
#include "syncRaftCfg.h"
#include "syncRaftLog.h"
#include "syncRaftStore.h"
#include "syncReplication.h"
#include "syncSnapshot.h"
#include "syncUtil.h"
#include "syncVoteMgr.h"
...
...
@@ -37,74 +38,6 @@
// /\ Discard(m)
// /\ UNCHANGED <<serverVars, candidateVars, logVars, elections>>
//
int32_t
syncNodeOnAppendEntriesReplyCb
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
)
{
int32_t
ret
=
0
;
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
ths
,
&
(
pMsg
->
srcId
))
&&
!
ths
->
pRaftCfg
->
isStandBy
)
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"maybe replica already dropped"
);
return
-
1
;
}
// drop stale response
if
(
pMsg
->
term
<
ths
->
pRaftStore
->
currentTerm
)
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"drop stale response"
);
return
0
;
}
// no need this code, because if I receive reply.term, then I must have sent for that term.
// if (pMsg->term > ths->pRaftStore->currentTerm) {
// syncNodeUpdateTerm(ths, pMsg->term);
// }
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"error term"
);
return
-
1
;
}
ASSERT
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
);
// update time
syncIndexMgrSetStartTime
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
startTime
);
syncIndexMgrSetRecvTime
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
taosGetTimestampMs
());
SyncIndex
beforeNextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
SyncIndex
beforeMatchIndex
=
syncIndexMgrGetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
));
if
(
pMsg
->
success
)
{
// nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1]
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
matchIndex
+
1
);
// matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex]
syncIndexMgrSetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
matchIndex
);
// maybe commit
syncMaybeAdvanceCommitIndex
(
ths
);
}
else
{
SyncIndex
nextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
// notice! int64, uint64
if
(
nextIndex
>
SYNC_INDEX_BEGIN
)
{
--
nextIndex
;
}
else
{
nextIndex
=
SYNC_INDEX_BEGIN
;
}
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
nextIndex
);
}
SyncIndex
afterNextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
SyncIndex
afterMatchIndex
=
syncIndexMgrGetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
));
do
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"before next:%"
PRId64
", match:%"
PRId64
", after next:%"
PRId64
", match:%"
PRId64
,
beforeNextIndex
,
beforeMatchIndex
,
afterNextIndex
,
afterMatchIndex
);
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
logBuf
);
}
while
(
0
);
return
0
;
}
// only start once
static
void
syncNodeStartSnapshotOnce
(
SSyncNode
*
ths
,
SyncIndex
beginIndex
,
SyncIndex
endIndex
,
SyncTerm
lastApplyTerm
,
...
...
@@ -151,7 +84,7 @@ static void syncNodeStartSnapshotOnce(SSyncNode* ths, SyncIndex beginIndex, Sync
}
}
int32_t
syncNodeOnAppendEntriesReply
Snapshot2Cb
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
)
{
int32_t
syncNodeOnAppendEntriesReply
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
)
{
int32_t
ret
=
0
;
// if already drop replica, do not process
...
...
@@ -166,251 +99,40 @@ int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntrie
return
0
;
}
// error term
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"error term"
);
return
-
1
;
}
ASSERT
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
);
// update time
syncIndexMgrSetStartTime
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
startTime
);
syncIndexMgrSetRecvTime
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
taosGetTimestampMs
());
SyncIndex
beforeNextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
SyncIndex
beforeMatchIndex
=
syncIndexMgrGetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
));
if
(
pMsg
->
success
)
{
SyncIndex
newNextIndex
=
pMsg
->
matchIndex
+
1
;
SyncIndex
newMatchIndex
=
pMsg
->
matchIndex
;
bool
needStartSnapshot
=
false
;
if
(
newMatchIndex
>=
SYNC_INDEX_BEGIN
&&
!
ths
->
pLogStore
->
syncLogExist
(
ths
->
pLogStore
,
newMatchIndex
))
{
needStartSnapshot
=
true
;
if
(
ths
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"error term"
);
syncNodeStepDown
(
ths
,
pMsg
->
term
);
return
-
1
;
}
if
(
!
needStartSnapshot
)
{
// update next-index, match-index
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
newNextIndex
);
syncIndexMgrSetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
),
newMatchIndex
);
ASSERT
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
);
// maybe commit
if
(
ths
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
if
(
pMsg
->
success
)
{
SyncIndex
oldMatchIndex
=
syncIndexMgrGetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
));
if
(
pMsg
->
matchIndex
>
oldMatchIndex
)
{
syncIndexMgrSetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
matchIndex
);
syncMaybeAdvanceCommitIndex
(
ths
);
}
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
matchIndex
+
1
);
}
else
{
// start snapshot <match+1, old snapshot.end>
SSnapshot
oldSnapshot
;
ths
->
pFsm
->
FpGetSnapshotInfo
(
ths
->
pFsm
,
&
oldSnapshot
);
if
(
oldSnapshot
.
lastApplyIndex
>
newMatchIndex
)
{
syncNodeStartSnapshotOnce
(
ths
,
newMatchIndex
+
1
,
oldSnapshot
.
lastApplyIndex
,
oldSnapshot
.
lastApplyTerm
,
pMsg
);
// term maybe not ok?
}
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
oldSnapshot
.
lastApplyIndex
+
1
);
syncIndexMgrSetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
),
newMatchIndex
);
}
// event log, update next-index
do
{
char
host
[
64
];
int16_t
port
;
syncUtilU642Addr
(
pMsg
->
srcId
.
addr
,
host
,
sizeof
(
host
),
&
port
);
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"reset next-index:%"
PRId64
", match-index:%"
PRId64
" for %s:%d"
,
newNextIndex
,
newMatchIndex
,
host
,
port
);
syncNodeEventLog
(
ths
,
logBuf
);
}
while
(
0
);
}
else
{
SyncIndex
nextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
if
(
nextIndex
>
SYNC_INDEX_BEGIN
)
{
--
nextIndex
;
// speed up
if
(
nextIndex
>
pMsg
->
matchIndex
+
1
)
{
nextIndex
=
pMsg
->
matchIndex
+
1
;
}
bool
needStartSnapshot
=
false
;
if
(
nextIndex
>=
SYNC_INDEX_BEGIN
&&
!
ths
->
pLogStore
->
syncLogExist
(
ths
->
pLogStore
,
nextIndex
))
{
needStartSnapshot
=
true
;
}
if
(
nextIndex
-
1
>=
SYNC_INDEX_BEGIN
&&
!
ths
->
pLogStore
->
syncLogExist
(
ths
->
pLogStore
,
nextIndex
-
1
))
{
needStartSnapshot
=
true
;
}
if
(
!
needStartSnapshot
)
{
// do nothing
}
else
{
SSnapshot
oldSnapshot
;
ths
->
pFsm
->
FpGetSnapshotInfo
(
ths
->
pFsm
,
&
oldSnapshot
);
SyncTerm
newSnapshotTerm
=
oldSnapshot
.
lastApplyTerm
;
SyncIndex
endIndex
;
if
(
ths
->
pLogStore
->
syncLogExist
(
ths
->
pLogStore
,
nextIndex
+
1
))
{
endIndex
=
nextIndex
;
}
else
{
endIndex
=
oldSnapshot
.
lastApplyIndex
;
}
syncNodeStartSnapshotOnce
(
ths
,
pMsg
->
matchIndex
+
1
,
endIndex
,
newSnapshotTerm
,
pMsg
);
// get sender
SSyncSnapshotSender
*
pSender
=
syncNodeGetSnapshotSender
(
ths
,
&
(
pMsg
->
srcId
));
ASSERT
(
pSender
!=
NULL
);
SyncIndex
sentryIndex
=
pSender
->
snapshot
.
lastApplyIndex
+
1
;
// update nextIndex to sentryIndex
if
(
nextIndex
<=
sentryIndex
)
{
nextIndex
=
sentryIndex
;
}
SyncIndex
nextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
if
(
nextIndex
>
SYNC_INDEX_BEGIN
)
{
--
nextIndex
;
}
}
else
{
nextIndex
=
SYNC_INDEX_BEGIN
;
}
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
nextIndex
);
SyncIndex
oldMatchIndex
=
syncIndexMgrGetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
));
if
(
pMsg
->
matchIndex
>
oldMatchIndex
)
{
syncIndexMgrSetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
matchIndex
);
}
// event log, update next-index
do
{
char
host
[
64
];
int16_t
port
;
syncUtilU642Addr
(
pMsg
->
srcId
.
addr
,
host
,
sizeof
(
host
),
&
port
);
SyncIndex
newNextIndex
=
nextIndex
;
SyncIndex
newMatchIndex
=
syncIndexMgrGetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
));
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"reset2 next-index:%"
PRId64
", match-index:%"
PRId64
" for %s:%d"
,
newNextIndex
,
newMatchIndex
,
host
,
port
);
syncNodeEventLog
(
ths
,
logBuf
);
}
while
(
0
);
}
SyncIndex
afterNextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
SyncIndex
afterMatchIndex
=
syncIndexMgrGetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
));
do
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"before next:%"
PRId64
", match:%"
PRId64
", after next:%"
PRId64
", match:%"
PRId64
,
beforeNextIndex
,
beforeMatchIndex
,
afterNextIndex
,
afterMatchIndex
);
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
logBuf
);
}
while
(
0
);
return
0
;
}
int32_t
syncNodeOnAppendEntriesReplySnapshotCb
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
)
{
int32_t
ret
=
0
;
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
ths
,
&
(
pMsg
->
srcId
))
&&
!
ths
->
pRaftCfg
->
isStandBy
)
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"maybe replica already dropped"
);
return
-
1
;
}
// drop stale response
if
(
pMsg
->
term
<
ths
->
pRaftStore
->
currentTerm
)
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"drop stale response"
);
return
0
;
}
// no need this code, because if I receive reply.term, then I must have sent for that term.
// if (pMsg->term > ths->pRaftStore->currentTerm) {
// syncNodeUpdateTerm(ths, pMsg->term);
// }
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"error term"
);
return
-
1
;
}
ASSERT
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
);
// update time
syncIndexMgrSetStartTime
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
startTime
);
syncIndexMgrSetRecvTime
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
taosGetTimestampMs
());
SyncIndex
beforeNextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
SyncIndex
beforeMatchIndex
=
syncIndexMgrGetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
));
if
(
pMsg
->
success
)
{
// nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1]
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
matchIndex
+
1
);
// matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex]
syncIndexMgrSetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
matchIndex
);
// maybe commit
if
(
ths
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
syncMaybeAdvanceCommitIndex
(
ths
);
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
nextIndex
);
}
}
else
{
SyncIndex
nextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
// notice! int64, uint64
if
(
nextIndex
>
SYNC_INDEX_BEGIN
)
{
--
nextIndex
;
// get sender
SSyncSnapshotSender
*
pSender
=
syncNodeGetSnapshotSender
(
ths
,
&
(
pMsg
->
srcId
));
ASSERT
(
pSender
!=
NULL
);
SSnapshot
snapshot
=
{.
data
=
NULL
,
.
lastApplyIndex
=
SYNC_INDEX_INVALID
,
.
lastApplyTerm
=
0
,
.
lastConfigIndex
=
SYNC_INDEX_INVALID
};
void
*
pReader
=
NULL
;
ths
->
pFsm
->
FpGetSnapshot
(
ths
->
pFsm
,
&
snapshot
,
NULL
,
&
pReader
);
if
(
snapshot
.
lastApplyIndex
>=
SYNC_INDEX_BEGIN
&&
nextIndex
<=
snapshot
.
lastApplyIndex
+
1
&&
!
snapshotSenderIsStart
(
pSender
)
&&
pMsg
->
privateTerm
<
pSender
->
privateTerm
)
{
// has snapshot
ASSERT
(
pReader
!=
NULL
);
SSnapshotParam
readerParam
=
{.
start
=
0
,
.
end
=
snapshot
.
lastApplyIndex
};
snapshotSenderStart
(
pSender
,
readerParam
,
snapshot
,
pReader
);
}
else
{
// no snapshot
if
(
pReader
!=
NULL
)
{
ths
->
pFsm
->
FpSnapshotStopRead
(
ths
->
pFsm
,
pReader
);
}
}
SyncIndex
sentryIndex
=
pSender
->
snapshot
.
lastApplyIndex
+
1
;
// update nextIndex to sentryIndex
if
(
nextIndex
<=
sentryIndex
)
{
nextIndex
=
sentryIndex
;
}
// send next append entries
SPeerState
*
pState
=
syncNodeGetPeerState
(
ths
,
&
(
pMsg
->
srcId
));
ASSERT
(
pState
!=
NULL
);
}
else
{
nextIndex
=
SYNC_INDEX_BEGIN
;
if
(
pMsg
->
lastSendIndex
==
pState
->
lastSendIndex
)
{
syncNodeReplicateOne
(
ths
,
&
(
pMsg
->
srcId
))
;
}
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
nextIndex
);
}
SyncIndex
afterNextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
SyncIndex
afterMatchIndex
=
syncIndexMgrGetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
));
do
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"before next:%"
PRId64
", match:%"
PRId64
", after next:%"
PRId64
", match:%"
PRId64
,
beforeNextIndex
,
beforeMatchIndex
,
afterNextIndex
,
afterMatchIndex
);
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
logBuf
);
}
while
(
0
);
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"process"
);
return
0
;
}
\ No newline at end of file
source/libs/sync/src/syncCommit.c
浏览文件 @
5965629b
...
...
@@ -45,8 +45,10 @@
// /\ UNCHANGED <<messages, serverVars, candidateVars, leaderVars, log>>
//
void
syncMaybeAdvanceCommitIndex
(
SSyncNode
*
pSyncNode
)
{
syncIndexMgrLog2
(
"==syncNodeMaybeAdvanceCommitIndex== pNextIndex"
,
pSyncNode
->
pNextIndex
);
syncIndexMgrLog2
(
"==syncNodeMaybeAdvanceCommitIndex== pMatchIndex"
,
pSyncNode
->
pMatchIndex
);
if
(
pSyncNode
->
state
!=
TAOS_SYNC_STATE_LEADER
)
{
syncNodeErrorLog
(
pSyncNode
,
"not leader, can not advance commit index"
);
return
;
}
// advance commit index to sanpshot first
SSnapshot
snapshot
;
...
...
@@ -75,9 +77,11 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
if
(
h
)
{
pEntry
=
(
SSyncRaftEntry
*
)
taosLRUCacheValue
(
pCache
,
h
);
}
else
{
pEntry
=
pSyncNode
->
pLogStore
->
getEntry
(
pSyncNode
->
pLogStore
,
index
);
if
(
pEntry
==
NULL
)
{
sError
(
"failed to get entry since %s. index:%"
PRId64
,
tstrerror
(
terrno
),
index
);
int32_t
code
=
pSyncNode
->
pLogStore
->
syncLogGetEntry
(
pSyncNode
->
pLogStore
,
index
,
&
pEntry
);
if
(
code
!=
0
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"advance commit index error, read wal index:%ld"
,
index
);
syncNodeErrorLog
(
pSyncNode
,
logBuf
);
return
;
}
}
...
...
@@ -125,13 +129,17 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
pSyncNode
->
commitIndex
=
newCommitIndex
;
// call back Wal
pSyncNode
->
pLogStore
->
u
pdateCommitIndex
(
pSyncNode
->
pLogStore
,
pSyncNode
->
commitIndex
);
pSyncNode
->
pLogStore
->
syncLogU
pdateCommitIndex
(
pSyncNode
->
pLogStore
,
pSyncNode
->
commitIndex
);
// execute fsm
if
(
pSyncNode
->
pFsm
!=
NULL
)
{
int32_t
code
=
syncNodeCommit
(
pSyncNode
,
beginIndex
,
endIndex
,
pSyncNode
->
state
);
int32_t
code
=
syncNode
Do
Commit
(
pSyncNode
,
beginIndex
,
endIndex
,
pSyncNode
->
state
);
if
(
code
!=
0
)
{
wError
(
"failed to commit sync node since %s"
,
tstrerror
(
terrno
));
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"advance commit index error, do commit begin:%ld, end:%ld"
,
beginIndex
,
endIndex
);
syncNodeErrorLog
(
pSyncNode
,
logBuf
);
return
;
}
}
}
...
...
@@ -220,6 +228,7 @@ int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) {
return
quorum
;
}
/*
bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
int agreeCount = 0;
for (int i = 0; i < pSyncNode->replicaNum; ++i) {
...
...
@@ -232,8 +241,8 @@ bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
}
return false;
}
*/
/*
bool
syncAgree
(
SSyncNode
*
pSyncNode
,
SyncIndex
index
)
{
int
agreeCount
=
0
;
for
(
int
i
=
0
;
i
<
pSyncNode
->
replicaNum
;
++
i
)
{
...
...
@@ -246,4 +255,3 @@ bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
}
return
false
;
}
*/
source/libs/sync/src/syncElection.c
浏览文件 @
5965629b
...
...
@@ -30,45 +30,6 @@
// msource |-> i,
// mdest |-> j])
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
//
int32_t
syncNodeRequestVotePeers
(
SSyncNode
*
pSyncNode
)
{
ASSERT
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_CANDIDATE
);
int32_t
ret
=
0
;
for
(
int
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SyncRequestVote
*
pMsg
=
syncRequestVoteBuild
(
pSyncNode
->
vgId
);
pMsg
->
srcId
=
pSyncNode
->
myRaftId
;
pMsg
->
destId
=
pSyncNode
->
peersId
[
i
];
pMsg
->
term
=
pSyncNode
->
pRaftStore
->
currentTerm
;
pMsg
->
lastLogIndex
=
pSyncNode
->
pLogStore
->
getLastIndex
(
pSyncNode
->
pLogStore
);
pMsg
->
lastLogTerm
=
pSyncNode
->
pLogStore
->
getLastTerm
(
pSyncNode
->
pLogStore
);
ret
=
syncNodeRequestVote
(
pSyncNode
,
&
pSyncNode
->
peersId
[
i
],
pMsg
);
ASSERT
(
ret
==
0
);
syncRequestVoteDestroy
(
pMsg
);
}
return
ret
;
}
int32_t
syncNodeRequestVotePeersSnapshot
(
SSyncNode
*
pSyncNode
)
{
ASSERT
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_CANDIDATE
);
int32_t
ret
=
0
;
for
(
int
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SyncRequestVote
*
pMsg
=
syncRequestVoteBuild
(
pSyncNode
->
vgId
);
pMsg
->
srcId
=
pSyncNode
->
myRaftId
;
pMsg
->
destId
=
pSyncNode
->
peersId
[
i
];
pMsg
->
term
=
pSyncNode
->
pRaftStore
->
currentTerm
;
ret
=
syncNodeGetLastIndexTerm
(
pSyncNode
,
&
(
pMsg
->
lastLogIndex
),
&
(
pMsg
->
lastLogTerm
));
ASSERT
(
ret
==
0
);
ret
=
syncNodeRequestVote
(
pSyncNode
,
&
pSyncNode
->
peersId
[
i
],
pMsg
);
ASSERT
(
ret
==
0
);
syncRequestVoteDestroy
(
pMsg
);
}
return
ret
;
}
int32_t
syncNodeElect
(
SSyncNode
*
pSyncNode
)
{
syncNodeEventLog
(
pSyncNode
,
"begin election"
);
...
...
@@ -111,27 +72,38 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) {
}
switch
(
pSyncNode
->
pRaftCfg
->
snapshotStrategy
)
{
case
SYNC_STRATEGY_NO_SNAPSHOT
:
ret
=
syncNodeRequestVotePeers
(
pSyncNode
);
break
;
ret
=
syncNodeRequestVotePeers
(
pSyncNode
);
ASSERT
(
ret
==
0
);
case
SYNC_STRATEGY_STANDARD_SNAPSHOT
:
case
SYNC_STRATEGY_WAL_FIRST
:
ret
=
syncNodeRequestVotePeersSnapshot
(
pSyncNode
);
break
;
syncNodeResetElectTimer
(
pSyncNode
);
default:
ret
=
syncNodeRequestVotePeers
(
pSyncNode
);
break
;
return
ret
;
}
int32_t
syncNodeRequestVotePeers
(
SSyncNode
*
pSyncNode
)
{
if
(
pSyncNode
->
state
!=
TAOS_SYNC_STATE_CANDIDATE
)
{
syncNodeEventLog
(
pSyncNode
,
"not candidate, stop elect"
);
return
0
;
}
ASSERT
(
ret
==
0
);
syncNodeResetElectTimer
(
pSyncNode
);
int32_t
ret
=
0
;
for
(
int
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SyncRequestVote
*
pMsg
=
syncRequestVoteBuild
(
pSyncNode
->
vgId
);
pMsg
->
srcId
=
pSyncNode
->
myRaftId
;
pMsg
->
destId
=
pSyncNode
->
peersId
[
i
];
pMsg
->
term
=
pSyncNode
->
pRaftStore
->
currentTerm
;
ret
=
syncNodeGetLastIndexTerm
(
pSyncNode
,
&
(
pMsg
->
lastLogIndex
),
&
(
pMsg
->
lastLogTerm
));
ASSERT
(
ret
==
0
);
ret
=
syncNodeSendRequestVote
(
pSyncNode
,
&
pSyncNode
->
peersId
[
i
],
pMsg
);
ASSERT
(
ret
==
0
);
syncRequestVoteDestroy
(
pMsg
);
}
return
ret
;
}
int32_t
syncNodeRequestVote
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
const
SyncRequestVote
*
pMsg
)
{
int32_t
syncNode
Send
RequestVote
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
const
SyncRequestVote
*
pMsg
)
{
int32_t
ret
=
0
;
syncLogSendRequestVote
(
pSyncNode
,
pMsg
,
""
);
...
...
source/libs/sync/src/syncIO.c
浏览文件 @
5965629b
...
...
@@ -326,18 +326,18 @@ static void *syncIOConsumerFunc(void *param) {
}
}
else
if
(
pRpcMsg
->
msgType
==
TDMT_SYNC_SNAPSHOT_SEND
)
{
if
(
io
->
FpOnSyncSnapshot
Send
!=
NULL
)
{
if
(
io
->
FpOnSyncSnapshot
!=
NULL
)
{
SyncSnapshotSend
*
pSyncMsg
=
syncSnapshotSendFromRpcMsg2
(
pRpcMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
io
->
FpOnSyncSnapshot
Send
(
io
->
pSyncNode
,
pSyncMsg
);
io
->
FpOnSyncSnapshot
(
io
->
pSyncNode
,
pSyncMsg
);
syncSnapshotSendDestroy
(
pSyncMsg
);
}
}
else
if
(
pRpcMsg
->
msgType
==
TDMT_SYNC_SNAPSHOT_RSP
)
{
if
(
io
->
FpOnSyncSnapshotR
sp
!=
NULL
)
{
if
(
io
->
FpOnSyncSnapshotR
eply
!=
NULL
)
{
SyncSnapshotRsp
*
pSyncMsg
=
syncSnapshotRspFromRpcMsg2
(
pRpcMsg
);
ASSERT
(
pSyncMsg
!=
NULL
);
io
->
FpOnSyncSnapshotR
sp
(
io
->
pSyncNode
,
pSyncMsg
);
io
->
FpOnSyncSnapshotR
eply
(
io
->
pSyncNode
,
pSyncMsg
);
syncSnapshotRspDestroy
(
pSyncMsg
);
}
...
...
source/libs/sync/src/syncIndexMgr.c
浏览文件 @
5965629b
...
...
@@ -136,7 +136,7 @@ cJSON *syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr) {
char
*
syncIndexMgr2Str
(
SSyncIndexMgr
*
pSyncIndexMgr
)
{
cJSON
*
pJson
=
syncIndexMgr2Json
(
pSyncIndexMgr
);
char
*
serialized
=
cJSON_Print
(
pJson
);
char
*
serialized
=
cJSON_Print
(
pJson
);
cJSON_Delete
(
pJson
);
return
serialized
;
}
...
...
source/libs/sync/src/syncMain.c
浏览文件 @
5965629b
...
...
@@ -46,6 +46,7 @@ static void syncNodeEqElectTimer(void* param, void* tmrId);
static
void
syncNodeEqHeartbeatTimer
(
void
*
param
,
void
*
tmrId
);
static
int32_t
syncNodeEqNoop
(
SSyncNode
*
ths
);
static
int32_t
syncNodeAppendNoop
(
SSyncNode
*
ths
);
static
void
syncNodeEqPeerHeartbeatTimer
(
void
*
param
,
void
*
tmrId
);
// process message ----
int32_t
syncNodeOnPingCb
(
SSyncNode
*
ths
,
SyncPing
*
pMsg
);
...
...
@@ -67,7 +68,7 @@ int32_t syncInit() {
syncCleanUp
();
ret
=
-
1
;
}
else
{
sDebug
(
"sync rsetId:%
"
PRId32
"
is open"
,
tsNodeRefId
);
sDebug
(
"sync rsetId:%
d
is open"
,
tsNodeRefId
);
ret
=
syncEnvStart
();
}
}
...
...
@@ -80,7 +81,7 @@ void syncCleanUp() {
ASSERT
(
ret
==
0
);
if
(
tsNodeRefId
!=
-
1
)
{
sDebug
(
"sync rsetId:%
"
PRId32
"
is closed"
,
tsNodeRefId
);
sDebug
(
"sync rsetId:%
d
is closed"
,
tsNodeRefId
);
taosCloseRef
(
tsNodeRefId
);
tsNodeRefId
=
-
1
;
}
...
...
@@ -89,7 +90,7 @@ void syncCleanUp() {
int64_t
syncOpen
(
SSyncInfo
*
pSyncInfo
)
{
SSyncNode
*
pSyncNode
=
syncNodeOpen
(
pSyncInfo
);
if
(
pSyncNode
==
NULL
)
{
sError
(
"
vgId:%d, failed to open sync node since %s"
,
pSyncInfo
->
vgId
,
terrstr
()
);
sError
(
"
failed to open sync node. vgId:%d"
,
pSyncInfo
->
vgId
);
return
-
1
;
}
...
...
@@ -100,7 +101,7 @@ int64_t syncOpen(SSyncInfo* pSyncInfo) {
return
-
1
;
}
sDebug
(
"vgId:%d, sync rid:%"
PRId64
" is added to rsetId:%
"
PRId32
,
pSyncInfo
->
vgId
,
pSyncNode
->
rid
,
tsNodeRefId
);
sDebug
(
"vgId:%d, sync rid:%"
PRId64
" is added to rsetId:%
d"
,
pSyncInfo
->
vgId
,
pSyncNode
->
rid
,
tsNodeRefId
);
return
pSyncNode
->
rid
;
}
...
...
@@ -146,7 +147,7 @@ void syncStop(int64_t rid) {
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
taosRemoveRef
(
tsNodeRefId
,
rid
);
sDebug
(
"vgId:%d, sync rid:%"
PRId64
" is removed from rsetId:%
"
PRId64
,
vgId
,
rid
,
(
int64_t
)
tsNodeRefId
);
sDebug
(
"vgId:%d, sync rid:%"
PRId64
" is removed from rsetId:%
d"
,
vgId
,
rid
,
tsNodeRefId
);
}
int32_t
syncSetStandby
(
int64_t
rid
)
{
...
...
@@ -230,7 +231,7 @@ int32_t syncReconfigBuild(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg
return
ret
;
}
int32_t
syncReconfig
(
int64_t
rid
,
SSyncCfg
*
pNewCfg
)
{
int32_t
syncReconfig
(
int64_t
rid
,
const
SSyncCfg
*
pNewCfg
)
{
SSyncNode
*
pSyncNode
=
(
SSyncNode
*
)
taosAcquireRef
(
tsNodeRefId
,
rid
);
if
(
pSyncNode
==
NULL
)
{
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
...
...
@@ -238,7 +239,6 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) {
}
ASSERT
(
rid
==
pSyncNode
->
rid
);
#if 0
if
(
!
syncNodeCheckNewConfig
(
pSyncNode
,
pNewCfg
))
{
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
terrno
=
TSDB_CODE_SYN_NEW_CONFIG_ERROR
;
...
...
@@ -260,12 +260,6 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) {
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
ret
;
#else
syncNodeUpdateNewConfigIndex
(
pSyncNode
,
pNewCfg
);
syncNodeDoConfigChange
(
pSyncNode
,
pNewCfg
,
SYNC_INDEX_INVALID
);
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
0
;
#endif
}
int32_t
syncLeaderTransfer
(
int64_t
rid
)
{
...
...
@@ -294,6 +288,187 @@ int32_t syncLeaderTransferTo(int64_t rid, SNodeInfo newLeader) {
return
ret
;
}
SyncIndex
syncMinMatchIndex
(
SSyncNode
*
pSyncNode
)
{
SyncIndex
minMatchIndex
=
SYNC_INDEX_INVALID
;
if
(
pSyncNode
->
peersNum
>
0
)
{
minMatchIndex
=
syncIndexMgrGetIndex
(
pSyncNode
->
pMatchIndex
,
&
(
pSyncNode
->
peersId
[
0
]));
}
for
(
int32_t
i
=
1
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SyncIndex
matchIndex
=
syncIndexMgrGetIndex
(
pSyncNode
->
pMatchIndex
,
&
(
pSyncNode
->
peersId
[
i
]));
if
(
matchIndex
<
minMatchIndex
)
{
minMatchIndex
=
matchIndex
;
}
}
return
minMatchIndex
;
}
int32_t
syncBeginSnapshot
(
int64_t
rid
,
int64_t
lastApplyIndex
)
{
SSyncNode
*
pSyncNode
=
(
SSyncNode
*
)
taosAcquireRef
(
tsNodeRefId
,
rid
);
if
(
pSyncNode
==
NULL
)
{
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
return
-
1
;
}
ASSERT
(
rid
==
pSyncNode
->
rid
);
int32_t
code
=
0
;
if
(
syncNodeIsMnode
(
pSyncNode
))
{
// mnode
int64_t
logRetention
=
SYNC_MNODE_LOG_RETENTION
;
SyncIndex
beginIndex
=
pSyncNode
->
pLogStore
->
syncLogBeginIndex
(
pSyncNode
->
pLogStore
);
SyncIndex
endIndex
=
pSyncNode
->
pLogStore
->
syncLogEndIndex
(
pSyncNode
->
pLogStore
);
int64_t
logNum
=
endIndex
-
beginIndex
;
bool
isEmpty
=
pSyncNode
->
pLogStore
->
syncLogIsEmpty
(
pSyncNode
->
pLogStore
);
if
(
isEmpty
||
(
!
isEmpty
&&
logNum
<
logRetention
))
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"new-snapshot-index:%ld, log-num:%ld, empty:%d, do not delete wal"
,
lastApplyIndex
,
logNum
,
isEmpty
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
0
;
}
goto
_DEL_WAL
;
}
else
{
// vnode
if
(
pSyncNode
->
replicaNum
>
1
)
{
// multi replicas
if
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
pSyncNode
->
minMatchIndex
=
syncMinMatchIndex
(
pSyncNode
);
for
(
int32_t
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
int64_t
matchIndex
=
syncIndexMgrGetIndex
(
pSyncNode
->
pMatchIndex
,
&
(
pSyncNode
->
peersId
[
i
]));
if
(
lastApplyIndex
>
matchIndex
)
{
do
{
char
host
[
64
];
uint16_t
port
;
syncUtilU642Addr
(
pSyncNode
->
peersId
[
i
].
addr
,
host
,
sizeof
(
host
),
&
port
);
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"new-snapshot-index:%ld is greater than match-index:%ld of %s:%d, do not delete wal"
,
lastApplyIndex
,
matchIndex
,
host
,
port
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
while
(
0
);
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
0
;
}
}
}
else
if
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_FOLLOWER
)
{
if
(
lastApplyIndex
>
pSyncNode
->
minMatchIndex
)
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"new-snapshot-index:%ld is greater than min-match-index:%ld, do not delete wal"
,
lastApplyIndex
,
pSyncNode
->
minMatchIndex
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
0
;
}
}
else
if
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_CANDIDATE
)
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"new-snapshot-index:%ld candidate, do not delete wal"
,
lastApplyIndex
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
0
;
}
else
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"new-snapshot-index:%ld unknown state, do not delete wal"
,
lastApplyIndex
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
0
;
}
goto
_DEL_WAL
;
}
else
{
// one replica
goto
_DEL_WAL
;
}
}
_DEL_WAL:
do
{
SyncIndex
snapshottingIndex
=
atomic_load_64
(
&
pSyncNode
->
snapshottingIndex
);
if
(
snapshottingIndex
==
SYNC_INDEX_INVALID
)
{
atomic_store_64
(
&
pSyncNode
->
snapshottingIndex
,
lastApplyIndex
);
pSyncNode
->
snapshottingTime
=
taosGetTimestampMs
();
SSyncLogStoreData
*
pData
=
pSyncNode
->
pLogStore
->
data
;
code
=
walBeginSnapshot
(
pData
->
pWal
,
lastApplyIndex
);
if
(
code
==
0
)
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal snapshot begin, index:%ld, last apply index:%ld"
,
pSyncNode
->
snapshottingIndex
,
lastApplyIndex
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
else
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal snapshot begin error since:%s, index:%ld, last apply index:%ld"
,
terrstr
(
terrno
),
pSyncNode
->
snapshottingIndex
,
lastApplyIndex
);
syncNodeErrorLog
(
pSyncNode
,
logBuf
);
atomic_store_64
(
&
pSyncNode
->
snapshottingIndex
,
SYNC_INDEX_INVALID
);
}
}
else
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"snapshotting for %ld, do not delete wal for new-snapshot-index:%ld"
,
snapshottingIndex
,
lastApplyIndex
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
}
while
(
0
);
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
code
;
}
int32_t
syncEndSnapshot
(
int64_t
rid
)
{
SSyncNode
*
pSyncNode
=
(
SSyncNode
*
)
taosAcquireRef
(
tsNodeRefId
,
rid
);
if
(
pSyncNode
==
NULL
)
{
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
return
-
1
;
}
ASSERT
(
rid
==
pSyncNode
->
rid
);
int32_t
code
=
0
;
if
(
atomic_load_64
(
&
pSyncNode
->
snapshottingIndex
)
!=
SYNC_INDEX_INVALID
)
{
SSyncLogStoreData
*
pData
=
pSyncNode
->
pLogStore
->
data
;
code
=
walEndSnapshot
(
pData
->
pWal
);
if
(
code
!=
0
)
{
sError
(
"vgId:%d, wal snapshot end error since:%s"
,
pSyncNode
->
vgId
,
terrstr
(
terrno
));
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
-
1
;
}
else
{
do
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal snapshot end, index:%ld"
,
atomic_load_64
(
&
pSyncNode
->
snapshottingIndex
));
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
while
(
0
);
atomic_store_64
(
&
pSyncNode
->
snapshottingIndex
,
SYNC_INDEX_INVALID
);
}
}
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
code
;
}
int32_t
syncNodeLeaderTransfer
(
SSyncNode
*
pSyncNode
)
{
if
(
pSyncNode
->
peersNum
==
0
)
{
sDebug
(
"only one replica, cannot leader transfer"
);
...
...
@@ -316,7 +491,7 @@ int32_t syncNodeLeaderTransferTo(SSyncNode* pSyncNode, SNodeInfo newLeader) {
}
do
{
char
logBuf
[
256
];
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"begin leader transfer to %s:%u"
,
newLeader
.
nodeFqdn
,
newLeader
.
nodePort
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
while
(
0
);
...
...
@@ -716,24 +891,6 @@ int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak) {
return
ret
;
}
int32_t
syncProposeBatch
(
int64_t
rid
,
SRpcMsg
**
pMsgPArr
,
bool
*
pIsWeakArr
,
int32_t
arrSize
)
{
if
(
arrSize
<
0
)
{
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
return
-
1
;
}
SSyncNode
*
pSyncNode
=
taosAcquireRef
(
tsNodeRefId
,
rid
);
if
(
pSyncNode
==
NULL
)
{
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
return
-
1
;
}
ASSERT
(
rid
==
pSyncNode
->
rid
);
int32_t
ret
=
syncNodeProposeBatch
(
pSyncNode
,
pMsgPArr
,
pIsWeakArr
,
arrSize
);
taosReleaseRef
(
tsNodeRefId
,
pSyncNode
->
rid
);
return
ret
;
}
static
bool
syncNodeBatchOK
(
SRpcMsg
**
pMsgPArr
,
int32_t
arrSize
)
{
for
(
int32_t
i
=
0
;
i
<
arrSize
;
++
i
)
{
if
(
pMsgPArr
[
i
]
->
msgType
==
TDMT_SYNC_CONFIG_CHANGE
)
{
...
...
@@ -748,91 +905,6 @@ static bool syncNodeBatchOK(SRpcMsg** pMsgPArr, int32_t arrSize) {
return
true
;
}
int32_t
syncNodeProposeBatch
(
SSyncNode
*
pSyncNode
,
SRpcMsg
**
pMsgPArr
,
bool
*
pIsWeakArr
,
int32_t
arrSize
)
{
if
(
!
syncNodeBatchOK
(
pMsgPArr
,
arrSize
))
{
syncNodeErrorLog
(
pSyncNode
,
"sync propose batch error"
);
terrno
=
TSDB_CODE_SYN_BATCH_ERROR
;
return
-
1
;
}
if
(
arrSize
>
SYNC_MAX_BATCH_SIZE
)
{
syncNodeErrorLog
(
pSyncNode
,
"sync propose batch error"
);
terrno
=
TSDB_CODE_SYN_BATCH_ERROR
;
return
-
1
;
}
if
(
pSyncNode
->
state
!=
TAOS_SYNC_STATE_LEADER
)
{
syncNodeErrorLog
(
pSyncNode
,
"sync propose not leader"
);
terrno
=
TSDB_CODE_SYN_NOT_LEADER
;
return
-
1
;
}
if
(
pSyncNode
->
changing
)
{
syncNodeErrorLog
(
pSyncNode
,
"sync propose not ready"
);
terrno
=
TSDB_CODE_SYN_PROPOSE_NOT_READY
;
return
-
1
;
}
SRaftMeta
raftArr
[
SYNC_MAX_BATCH_SIZE
];
for
(
int
i
=
0
;
i
<
arrSize
;
++
i
)
{
do
{
char
eventLog
[
128
];
snprintf
(
eventLog
,
sizeof
(
eventLog
),
"propose message, type:%s batch:%d"
,
TMSG_INFO
(
pMsgPArr
[
i
]
->
msgType
),
arrSize
);
syncNodeEventLog
(
pSyncNode
,
eventLog
);
}
while
(
0
);
SRespStub
stub
;
stub
.
createTime
=
taosGetTimestampMs
();
stub
.
rpcMsg
=
*
(
pMsgPArr
[
i
]);
uint64_t
seqNum
=
syncRespMgrAdd
(
pSyncNode
->
pSyncRespMgr
,
&
stub
);
raftArr
[
i
].
isWeak
=
pIsWeakArr
[
i
];
raftArr
[
i
].
seqNum
=
seqNum
;
}
SyncClientRequestBatch
*
pSyncMsg
=
syncClientRequestBatchBuild
(
pMsgPArr
,
raftArr
,
arrSize
,
pSyncNode
->
vgId
);
ASSERT
(
pSyncMsg
!=
NULL
);
SRpcMsg
rpcMsg
;
syncClientRequestBatch2RpcMsg
(
pSyncMsg
,
&
rpcMsg
);
taosMemoryFree
(
pSyncMsg
);
// only free msg body, do not free rpc msg content
if
(
pSyncNode
->
replicaNum
==
1
&&
pSyncNode
->
vgId
!=
1
)
{
int32_t
code
=
syncNodeOnClientRequestBatchCb
(
pSyncNode
,
pSyncMsg
);
if
(
code
==
0
)
{
// update rpc msg applyIndex
SRpcMsg
*
msgArr
=
syncClientRequestBatchRpcMsgArr
(
pSyncMsg
);
ASSERT
(
arrSize
==
pSyncMsg
->
dataCount
);
for
(
int
i
=
0
;
i
<
arrSize
;
++
i
)
{
pMsgPArr
[
i
]
->
info
.
conn
.
applyIndex
=
msgArr
[
i
].
info
.
conn
.
applyIndex
;
syncRespMgrDel
(
pSyncNode
->
pSyncRespMgr
,
raftArr
[
i
].
seqNum
);
}
rpcFreeCont
(
rpcMsg
.
pCont
);
terrno
=
0
;
return
1
;
}
else
{
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
return
-
1
;
}
}
else
{
if
(
pSyncNode
->
FpEqMsg
!=
NULL
&&
(
*
pSyncNode
->
FpEqMsg
)(
pSyncNode
->
msgcb
,
&
rpcMsg
)
==
0
)
{
// enqueue msg ok
return
0
;
}
else
{
sError
(
"vgId:%d, enqueue msg error, FpEqMsg is NULL"
,
pSyncNode
->
vgId
);
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
return
-
1
;
}
}
return
0
;
}
int32_t
syncNodePropose
(
SSyncNode
*
pSyncNode
,
SRpcMsg
*
pMsg
,
bool
isWeak
)
{
int32_t
ret
=
0
;
...
...
@@ -867,8 +939,8 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak) {
if
(
!
pSyncNode
->
restoreFinish
&&
pSyncNode
->
vgId
!=
1
)
{
ret
=
-
1
;
terrno
=
TSDB_CODE_SYN_PROPOSE_NOT_READY
;
sError
(
"vgId:%d, failed to sync propose since not ready, type:%s, last:%
"
PRId64
", cmt:%"
PRId64
""
,
pSyncNode
->
vgId
,
TMSG_INFO
(
pMsg
->
msgType
),
syncNodeGetLastIndex
(
pSyncNode
),
pSyncNode
->
commitIndex
);
sError
(
"vgId:%d, failed to sync propose since not ready, type:%s, last:%
ld, cmt:%ld"
,
pSyncNode
->
vgId
,
TMSG_INFO
(
pMsg
->
msgType
),
syncNodeGetLastIndex
(
pSyncNode
),
pSyncNode
->
commitIndex
);
goto
_END
;
}
...
...
@@ -884,7 +956,7 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak) {
// optimized one replica
if
(
syncNodeIsOptimizedOneReplica
(
pSyncNode
,
pMsg
))
{
SyncIndex
retIndex
;
int32_t
code
=
syncNodeOnClientRequest
Cb
(
pSyncNode
,
pSyncMsg
,
&
retIndex
);
int32_t
code
=
syncNodeOnClientRequest
(
pSyncNode
,
pSyncMsg
,
&
retIndex
);
if
(
code
==
0
)
{
pMsg
->
info
.
conn
.
applyIndex
=
retIndex
;
pMsg
->
info
.
conn
.
applyTerm
=
pSyncNode
->
pRaftStore
->
currentTerm
;
...
...
@@ -925,8 +997,46 @@ _END:
return
ret
;
}
int32_t
syncHbTimerInit
(
SSyncNode
*
pSyncNode
,
SSyncTimer
*
pSyncTimer
,
SRaftId
destId
)
{
pSyncTimer
->
pTimer
=
NULL
;
pSyncTimer
->
counter
=
0
;
pSyncTimer
->
timerMS
=
pSyncNode
->
hbBaseLine
;
pSyncTimer
->
timerCb
=
syncNodeEqPeerHeartbeatTimer
;
pSyncTimer
->
destId
=
destId
;
atomic_store_64
(
&
pSyncTimer
->
logicClock
,
0
);
return
0
;
}
int32_t
syncHbTimerStart
(
SSyncNode
*
pSyncNode
,
SSyncTimer
*
pSyncTimer
)
{
int32_t
ret
=
0
;
if
(
syncEnvIsStart
())
{
SSyncHbTimerData
*
pData
=
taosMemoryMalloc
(
sizeof
(
SSyncHbTimerData
));
pData
->
pSyncNode
=
pSyncNode
;
pData
->
pTimer
=
pSyncTimer
;
pData
->
destId
=
pSyncTimer
->
destId
;
pData
->
logicClock
=
pSyncTimer
->
logicClock
;
pSyncTimer
->
pData
=
pData
;
taosTmrReset
(
pSyncTimer
->
timerCb
,
pSyncTimer
->
timerMS
,
pData
,
gSyncEnv
->
pTimerManager
,
&
pSyncTimer
->
pTimer
);
}
else
{
sError
(
"vgId:%d, start ctrl hb timer error, sync env is stop"
,
pSyncNode
->
vgId
);
}
return
ret
;
}
int32_t
syncHbTimerStop
(
SSyncNode
*
pSyncNode
,
SSyncTimer
*
pSyncTimer
)
{
int32_t
ret
=
0
;
atomic_add_fetch_64
(
&
pSyncTimer
->
logicClock
,
1
);
taosTmrStop
(
pSyncTimer
->
pTimer
);
pSyncTimer
->
pTimer
=
NULL
;
// taosMemoryFree(pSyncTimer->pData);
return
ret
;
}
// open/close --------------
SSyncNode
*
syncNodeOpen
(
SSyncInfo
*
pSyncInfo
)
{
SSyncNode
*
syncNodeOpen
(
SSyncInfo
*
pOldSyncInfo
)
{
SSyncInfo
*
pSyncInfo
=
(
SSyncInfo
*
)
pOldSyncInfo
;
SSyncNode
*
pSyncNode
=
(
SSyncNode
*
)
taosMemoryCalloc
(
1
,
sizeof
(
SSyncNode
));
if
(
pSyncNode
==
NULL
)
{
terrno
=
TSDB_CODE_OUT_OF_MEMORY
;
...
...
@@ -962,9 +1072,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
sError
(
"failed to open raft cfg file. path:%s"
,
pSyncNode
->
configPath
);
goto
_error
;
}
if
(
pSyncInfo
->
syncCfg
.
replicaNum
==
0
)
{
pSyncInfo
->
syncCfg
=
pSyncNode
->
pRaftCfg
->
cfg
;
}
pSyncInfo
->
syncCfg
=
pSyncNode
->
pRaftCfg
->
cfg
;
raftCfgClose
(
pSyncNode
->
pRaftCfg
);
pSyncNode
->
pRaftCfg
=
NULL
;
...
...
@@ -981,6 +1089,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode
->
msgcb
=
pSyncInfo
->
msgcb
;
pSyncNode
->
FpSendMsg
=
pSyncInfo
->
FpSendMsg
;
pSyncNode
->
FpEqMsg
=
pSyncInfo
->
FpEqMsg
;
pSyncNode
->
FpEqCtrlMsg
=
pSyncInfo
->
FpEqCtrlMsg
;
// init raft config
pSyncNode
->
pRaftCfg
=
raftCfgOpen
(
pSyncNode
->
configPath
);
...
...
@@ -1136,29 +1245,22 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode
->
FpHeartbeatTimerCB
=
syncNodeEqHeartbeatTimer
;
pSyncNode
->
heartbeatTimerCounter
=
0
;
// init peer heartbeat timer
for
(
int32_t
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
syncHbTimerInit
(
pSyncNode
,
&
(
pSyncNode
->
peerHeartbeatTimerArr
[
i
]),
(
pSyncNode
->
replicasId
)[
i
]);
}
// init callback
pSyncNode
->
FpOnPing
=
syncNodeOnPingCb
;
pSyncNode
->
FpOnPingReply
=
syncNodeOnPingReplyCb
;
pSyncNode
->
FpOnClientRequest
=
syncNodeOnClientRequestCb
;
pSyncNode
->
FpOnTimeout
=
syncNodeOnTimeoutCb
;
pSyncNode
->
FpOnSnapshotSend
=
syncNodeOnSnapshotSendCb
;
pSyncNode
->
FpOnSnapshotRsp
=
syncNodeOnSnapshotRspCb
;
if
(
pSyncNode
->
pRaftCfg
->
snapshotStrategy
)
{
sInfo
(
"vgId:%d, sync node use snapshot"
,
pSyncNode
->
vgId
);
pSyncNode
->
FpOnRequestVote
=
syncNodeOnRequestVoteSnapshotCb
;
pSyncNode
->
FpOnRequestVoteReply
=
syncNodeOnRequestVoteReplySnapshotCb
;
pSyncNode
->
FpOnAppendEntries
=
syncNodeOnAppendEntriesSnapshotCb
;
pSyncNode
->
FpOnAppendEntriesReply
=
syncNodeOnAppendEntriesReplySnapshotCb
;
}
else
{
sInfo
(
"vgId:%d, sync node do not use snapshot"
,
pSyncNode
->
vgId
);
pSyncNode
->
FpOnRequestVote
=
syncNodeOnRequestVoteCb
;
pSyncNode
->
FpOnRequestVoteReply
=
syncNodeOnRequestVoteReplyCb
;
pSyncNode
->
FpOnAppendEntries
=
syncNodeOnAppendEntriesCb
;
pSyncNode
->
FpOnAppendEntriesReply
=
syncNodeOnAppendEntriesReplyCb
;
}
pSyncNode
->
FpOnClientRequest
=
syncNodeOnClientRequest
;
pSyncNode
->
FpOnTimeout
=
syncNodeOnTimer
;
pSyncNode
->
FpOnSnapshot
=
syncNodeOnSnapshot
;
pSyncNode
->
FpOnSnapshotReply
=
syncNodeOnSnapshotReply
;
pSyncNode
->
FpOnRequestVote
=
syncNodeOnRequestVote
;
pSyncNode
->
FpOnRequestVoteReply
=
syncNodeOnRequestVoteReply
;
pSyncNode
->
FpOnAppendEntries
=
syncNodeOnAppendEntries
;
pSyncNode
->
FpOnAppendEntriesReply
=
syncNodeOnAppendEntriesReply
;
// tools
pSyncNode
->
pSyncRespMgr
=
syncRespMgrCreate
(
pSyncNode
,
SYNC_RESP_TTL_MS
);
...
...
@@ -1183,6 +1285,12 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
// is config changing
pSyncNode
->
changing
=
false
;
// peer state
syncNodePeerStateInit
(
pSyncNode
);
// min match index
pSyncNode
->
minMatchIndex
=
SYNC_INDEX_INVALID
;
// start in syncNodeStart
// start raft
// syncNodeBecomeFollower(pSyncNode);
...
...
@@ -1192,6 +1300,9 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode
->
leaderTime
=
timeNow
;
pSyncNode
->
lastReplicateTime
=
timeNow
;
// snapshotting
atomic_store_64
(
&
pSyncNode
->
snapshottingIndex
,
SYNC_INDEX_INVALID
);
syncNodeEventLog
(
pSyncNode
,
"sync open"
);
return
pSyncNode
;
...
...
@@ -1226,15 +1337,14 @@ void syncNodeStart(SSyncNode* pSyncNode) {
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop
(
pSyncNode
);
syncMaybeAdvanceCommitIndex
(
pSyncNode
);
}
else
{
syncNodeBecomeFollower
(
pSyncNode
,
"first start"
);
}
if
(
pSyncNode
->
vgId
==
1
)
{
int32_t
ret
=
0
;
ret
=
syncNodeStartPingTimer
(
pSyncNode
);
ASSERT
(
ret
==
0
);
}
int32_t
ret
=
0
;
ret
=
syncNodeStartPingTimer
(
pSyncNode
);
ASSERT
(
ret
==
0
);
}
void
syncNodeStartStandBy
(
SSyncNode
*
pSyncNode
)
{
...
...
@@ -1247,11 +1357,9 @@ void syncNodeStartStandBy(SSyncNode* pSyncNode) {
int32_t
ret
=
syncNodeRestartElectTimer
(
pSyncNode
,
electMS
);
ASSERT
(
ret
==
0
);
if
(
pSyncNode
->
vgId
==
1
)
{
int32_t
ret
=
0
;
ret
=
syncNodeStartPingTimer
(
pSyncNode
);
ASSERT
(
ret
==
0
);
}
ret
=
0
;
ret
=
syncNodeStartPingTimer
(
pSyncNode
);
ASSERT
(
ret
==
0
);
}
void
syncNodeClose
(
SSyncNode
*
pSyncNode
)
{
...
...
@@ -1383,11 +1491,13 @@ int32_t syncNodeStartElectTimer(SSyncNode* pSyncNode, int32_t ms) {
&
pSyncNode
->
pElectTimer
);
atomic_store_64
(
&
pSyncNode
->
electTimerLogicClock
,
pSyncNode
->
electTimerLogicClockUser
);
do
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"elect timer reset, ms:%d"
,
ms
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
while
(
0
);
/*
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "elect timer reset, ms:%d", ms);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
*/
}
else
{
sError
(
"vgId:%d, start elect timer error, sync env is stop"
,
pSyncNode
->
vgId
);
...
...
@@ -1401,7 +1511,7 @@ int32_t syncNodeStopElectTimer(SSyncNode* pSyncNode) {
taosTmrStop
(
pSyncNode
->
pElectTimer
);
pSyncNode
->
pElectTimer
=
NULL
;
sTrace
(
"vgId:%d, sync %s stop elect timer"
,
pSyncNode
->
vgId
,
syncUtilState2String
(
pSyncNode
->
state
));
//
sTrace("vgId:%d, sync %s stop elect timer", pSyncNode->vgId, syncUtilState2String(pSyncNode->state));
return
ret
;
}
...
...
@@ -1453,30 +1563,34 @@ static int32_t syncNodeDoStartHeartbeatTimer(SSyncNode* pSyncNode) {
}
int32_t
syncNodeStartHeartbeatTimer
(
SSyncNode
*
pSyncNode
)
{
int32_t
ret
=
0
;
#if 0
pSyncNode->heartbeatTimerMS = pSyncNode->hbBaseLine;
int32_t
ret
=
syncNodeDoStartHeartbeatTimer
(
pSyncNode
);
return
ret
;
}
ret = syncNodeDoStartHeartbeatTimer(pSyncNode);
#endif
int32_t
syncNodeStartHeartbeatTimerMS
(
SSyncNode
*
pSyncNode
,
int32_t
ms
)
{
pSyncNode
->
heartbeatTimerMS
=
ms
;
int32_t
ret
=
syncNodeDoStartHeartbeatTimer
(
pSyncNode
);
return
ret
;
}
for
(
int
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SSyncTimer
*
pSyncTimer
=
syncNodeGetHbTimer
(
pSyncNode
,
&
(
pSyncNode
->
peersId
[
i
]));
syncHbTimerStart
(
pSyncNode
,
pSyncTimer
);
}
int32_t
syncNodeStartHeartbeatTimerNow
(
SSyncNode
*
pSyncNode
)
{
pSyncNode
->
heartbeatTimerMS
=
1
;
int32_t
ret
=
syncNodeDoStartHeartbeatTimer
(
pSyncNode
);
return
ret
;
}
int32_t
syncNodeStopHeartbeatTimer
(
SSyncNode
*
pSyncNode
)
{
int32_t
ret
=
0
;
#if 0
atomic_add_fetch_64(&pSyncNode->heartbeatTimerLogicClockUser, 1);
taosTmrStop(pSyncNode->pHeartbeatTimer);
pSyncNode->pHeartbeatTimer = NULL;
#endif
sTrace
(
"vgId:%d, sync %s stop heartbeat timer"
,
pSyncNode
->
vgId
,
syncUtilState2String
(
pSyncNode
->
state
));
for
(
int
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SSyncTimer
*
pSyncTimer
=
syncNodeGetHbTimer
(
pSyncNode
,
&
(
pSyncNode
->
peersId
[
i
]));
syncHbTimerStop
(
pSyncNode
,
pSyncTimer
);
}
return
ret
;
}
...
...
@@ -1487,18 +1601,6 @@ int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode) {
return
0
;
}
int32_t
syncNodeRestartHeartbeatTimerNow
(
SSyncNode
*
pSyncNode
)
{
syncNodeStopHeartbeatTimer
(
pSyncNode
);
syncNodeStartHeartbeatTimerNow
(
pSyncNode
);
return
0
;
}
int32_t
syncNodeRestartNowHeartbeatTimerMS
(
SSyncNode
*
pSyncNode
,
int32_t
ms
)
{
syncNodeStopHeartbeatTimer
(
pSyncNode
);
syncNodeStartHeartbeatTimerMS
(
pSyncNode
,
ms
);
return
0
;
}
// utils --------------
int32_t
syncNodeSendMsgById
(
const
SRaftId
*
destRaftId
,
SSyncNode
*
pSyncNode
,
SRpcMsg
*
pMsg
)
{
SEpSet
epSet
;
...
...
@@ -1726,18 +1828,18 @@ inline void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) {
char
logBuf
[
256
+
256
];
if
(
pSyncNode
!=
NULL
&&
pSyncNode
->
pRaftCfg
!=
NULL
&&
pSyncNode
->
pRaftStore
!=
NULL
)
{
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"vgId:%d, sync %s %s, tm:%"
PRIu64
", cmt:%"
PRId64
", fst:%"
PRId64
", lst:%"
PRId64
",
snap
:%"
PRId64
", snap-tm:%"
PRIu64
"vgId:%d, sync %s %s, tm:%"
PRIu64
", cmt:%"
PRId64
", fst:%"
PRId64
", lst:%"
PRId64
",
min
:%"
PRId64
", snap
:%"
PRId64
", snap
-tm:%"
PRIu64
", sby:%d, "
"stgy:%d, bch:%d, "
"r-num:%d, "
"lcfg:%"
PRId64
", chging:%d, rsto:%d, dquorum:%d, elt:%"
PRId64
", hb:%"
PRId64
", %s"
,
pSyncNode
->
vgId
,
syncUtilState2String
(
pSyncNode
->
state
),
str
,
pSyncNode
->
pRaftStore
->
currentTerm
,
pSyncNode
->
commitIndex
,
logBeginIndex
,
logLastIndex
,
snapshot
.
lastApplyIndex
,
snapshot
.
lastApplyTerm
,
pSyncNode
->
pRaftCfg
->
isStandBy
,
pSyncNode
->
pRaftCfg
->
snapshotStrategy
,
pSyncNode
->
pRaftCfg
->
batchSize
,
pSyncNode
->
replicaNum
,
pSyncNode
->
pRaftCfg
->
lastConfigIndex
,
pSyncNode
->
changing
,
pSyncNode
->
restoreFinish
,
syncNodeDynamicQuorum
(
pSyncNode
),
pSyncNode
->
electTimerLogicClockUser
,
pSyncNode
->
heartbeatTimerLogicClockUser
,
printStr
);
pSyncNode
->
commitIndex
,
logBeginIndex
,
logLastIndex
,
pSyncNode
->
minMatchIndex
,
snapshot
.
lastApplyIndex
,
snapshot
.
lastApplyTerm
,
pSyncNode
->
pRaftCfg
->
isStandBy
,
pSyncNode
->
pRaftCfg
->
snapshotStrategy
,
pSyncNode
->
pRaftCfg
->
batchSize
,
pSyncNode
->
replicaNum
,
pSyncNode
->
pRaftCfg
->
lastConfigIndex
,
pSyncNode
->
changing
,
pSyncNode
->
restoreFinish
,
syncNodeDynamicQuorum
(
pSyncNode
)
,
pSyncNode
->
electTimerLogicClockUser
,
pSyncNode
->
heartbeatTimerLogicClockUser
,
printStr
);
}
else
{
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"%s"
,
str
);
}
...
...
@@ -1750,18 +1852,18 @@ inline void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) {
char
*
s
=
(
char
*
)
taosMemoryMalloc
(
len
);
if
(
pSyncNode
!=
NULL
&&
pSyncNode
->
pRaftCfg
!=
NULL
&&
pSyncNode
->
pRaftStore
!=
NULL
)
{
snprintf
(
s
,
len
,
"vgId:%d, sync %s %s, tm:%"
PRIu64
", cmt:%"
PRId64
", fst:%"
PRId64
", lst:%"
PRId64
",
snap
:%"
PRId64
", snap-tm:%"
PRIu64
"vgId:%d, sync %s %s, tm:%"
PRIu64
", cmt:%"
PRId64
", fst:%"
PRId64
", lst:%"
PRId64
",
min
:%"
PRId64
", snap
:%"
PRId64
", snap
-tm:%"
PRIu64
", sby:%d, "
"stgy:%d, bch:%d, "
"r-num:%d, "
"lcfg:%"
PRId64
", chging:%d, rsto:%d, dquorum:%d, elt:%"
PRId64
", hb:%"
PRId64
", %s"
,
pSyncNode
->
vgId
,
syncUtilState2String
(
pSyncNode
->
state
),
str
,
pSyncNode
->
pRaftStore
->
currentTerm
,
pSyncNode
->
commitIndex
,
logBeginIndex
,
logLastIndex
,
snapshot
.
lastApplyIndex
,
snapshot
.
lastApplyTerm
,
pSyncNode
->
pRaftCfg
->
isStandBy
,
pSyncNode
->
pRaftCfg
->
snapshotStrategy
,
pSyncNode
->
pRaftCfg
->
batchSize
,
pSyncNode
->
replicaNum
,
pSyncNode
->
pRaftCfg
->
lastConfigIndex
,
pSyncNode
->
changing
,
pSyncNode
->
restoreFinish
,
syncNodeDynamicQuorum
(
pSyncNode
),
pSyncNode
->
electTimerLogicClockUser
,
pSyncNode
->
heartbeatTimerLogicClockUser
,
printStr
);
pSyncNode
->
commitIndex
,
logBeginIndex
,
logLastIndex
,
pSyncNode
->
minMatchIndex
,
snapshot
.
lastApplyIndex
,
snapshot
.
lastApplyTerm
,
pSyncNode
->
pRaftCfg
->
isStandBy
,
pSyncNode
->
pRaftCfg
->
snapshotStrategy
,
pSyncNode
->
pRaftCfg
->
batchSize
,
pSyncNode
->
replicaNum
,
pSyncNode
->
pRaftCfg
->
lastConfigIndex
,
pSyncNode
->
changing
,
pSyncNode
->
restoreFinish
,
syncNodeDynamicQuorum
(
pSyncNode
)
,
pSyncNode
->
electTimerLogicClockUser
,
pSyncNode
->
heartbeatTimerLogicClockUser
,
printStr
);
}
else
{
snprintf
(
s
,
len
,
"%s"
,
str
);
}
...
...
@@ -1799,17 +1901,18 @@ inline void syncNodeErrorLog(const SSyncNode* pSyncNode, char* str) {
char
logBuf
[
256
+
256
];
if
(
pSyncNode
!=
NULL
&&
pSyncNode
->
pRaftCfg
!=
NULL
&&
pSyncNode
->
pRaftStore
!=
NULL
)
{
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"vgId:%d, sync %s %s, tm:%"
PRIu64
", cmt:%"
PRId64
", fst:%"
PRId64
", lst:%"
PRId64
",
snap
:%"
PRId64
", snap-tm:%"
PRIu64
"vgId:%d, sync %s %s, tm:%"
PRIu64
", cmt:%"
PRId64
", fst:%"
PRId64
", lst:%"
PRId64
",
min
:%"
PRId64
", snap
:%"
PRId64
", snap
-tm:%"
PRIu64
", sby:%d, "
"stgy:%d, bch:%d, "
"r-num:%d, "
"lcfg:%"
PRId64
", chging:%d, rsto:%d, %s"
,
"lcfg:%"
PRId64
", chging:%d, rsto:%d,
dquorum:%d, elt:%"
PRId64
", hb:%"
PRId64
",
%s"
,
pSyncNode
->
vgId
,
syncUtilState2String
(
pSyncNode
->
state
),
str
,
pSyncNode
->
pRaftStore
->
currentTerm
,
pSyncNode
->
commitIndex
,
logBeginIndex
,
logLastIndex
,
snapshot
.
lastApplyIndex
,
snapshot
.
lastApplyTerm
,
pSyncNode
->
pRaftCfg
->
isStandBy
,
pSyncNode
->
pRaftCfg
->
snapshotStrategy
,
pSyncNode
->
pRaftCfg
->
batchSize
,
pSyncNode
->
replicaNum
,
pSyncNode
->
pRaftCfg
->
lastConfigIndex
,
pSyncNode
->
changing
,
pSyncNode
->
restoreFinish
,
printStr
);
pSyncNode
->
commitIndex
,
logBeginIndex
,
logLastIndex
,
pSyncNode
->
minMatchIndex
,
snapshot
.
lastApplyIndex
,
snapshot
.
lastApplyTerm
,
pSyncNode
->
pRaftCfg
->
isStandBy
,
pSyncNode
->
pRaftCfg
->
snapshotStrategy
,
pSyncNode
->
pRaftCfg
->
batchSize
,
pSyncNode
->
replicaNum
,
pSyncNode
->
pRaftCfg
->
lastConfigIndex
,
pSyncNode
->
changing
,
pSyncNode
->
restoreFinish
,
syncNodeDynamicQuorum
(
pSyncNode
),
pSyncNode
->
electTimerLogicClockUser
,
pSyncNode
->
heartbeatTimerLogicClockUser
,
printStr
);
}
else
{
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"%s"
,
str
);
}
...
...
@@ -1820,17 +1923,18 @@ inline void syncNodeErrorLog(const SSyncNode* pSyncNode, char* str) {
char
*
s
=
(
char
*
)
taosMemoryMalloc
(
len
);
if
(
pSyncNode
!=
NULL
&&
pSyncNode
->
pRaftCfg
!=
NULL
&&
pSyncNode
->
pRaftStore
!=
NULL
)
{
snprintf
(
s
,
len
,
"vgId:%d, sync %s %s, tm:%"
PRIu64
", cmt:%"
PRId64
", fst:%"
PRId64
", lst:%"
PRId64
",
snap
:%"
PRId64
", snap-tm:%"
PRIu64
"vgId:%d, sync %s %s, tm:%"
PRIu64
", cmt:%"
PRId64
", fst:%"
PRId64
", lst:%"
PRId64
",
min
:%"
PRId64
", snap
:%"
PRId64
", snap
-tm:%"
PRIu64
", sby:%d, "
"stgy:%d, bch:%d, "
"r-num:%d, "
"lcfg:%"
PRId64
", chging:%d, rsto:%d, %s"
,
"lcfg:%"
PRId64
", chging:%d, rsto:%d,
dquorum:%d, elt:%"
PRId64
", hb:%"
PRId64
",
%s"
,
pSyncNode
->
vgId
,
syncUtilState2String
(
pSyncNode
->
state
),
str
,
pSyncNode
->
pRaftStore
->
currentTerm
,
pSyncNode
->
commitIndex
,
logBeginIndex
,
logLastIndex
,
snapshot
.
lastApplyIndex
,
snapshot
.
lastApplyTerm
,
pSyncNode
->
pRaftCfg
->
isStandBy
,
pSyncNode
->
pRaftCfg
->
snapshotStrategy
,
pSyncNode
->
pRaftCfg
->
batchSize
,
pSyncNode
->
replicaNum
,
pSyncNode
->
pRaftCfg
->
lastConfigIndex
,
pSyncNode
->
changing
,
pSyncNode
->
restoreFinish
,
printStr
);
pSyncNode
->
commitIndex
,
logBeginIndex
,
logLastIndex
,
pSyncNode
->
minMatchIndex
,
snapshot
.
lastApplyIndex
,
snapshot
.
lastApplyTerm
,
pSyncNode
->
pRaftCfg
->
isStandBy
,
pSyncNode
->
pRaftCfg
->
snapshotStrategy
,
pSyncNode
->
pRaftCfg
->
batchSize
,
pSyncNode
->
replicaNum
,
pSyncNode
->
pRaftCfg
->
lastConfigIndex
,
pSyncNode
->
changing
,
pSyncNode
->
restoreFinish
,
syncNodeDynamicQuorum
(
pSyncNode
),
pSyncNode
->
electTimerLogicClockUser
,
pSyncNode
->
heartbeatTimerLogicClockUser
,
printStr
);
}
else
{
snprintf
(
s
,
len
,
"%s"
,
str
);
}
...
...
@@ -1891,26 +1995,8 @@ inline bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config) {
return
b1
;
}
static
bool
syncIsConfigChanged
(
const
SSyncCfg
*
pOldCfg
,
const
SSyncCfg
*
pNewCfg
)
{
if
(
pOldCfg
->
replicaNum
!=
pNewCfg
->
replicaNum
)
return
true
;
if
(
pOldCfg
->
myIndex
!=
pNewCfg
->
myIndex
)
return
true
;
for
(
int32_t
i
=
0
;
i
<
pOldCfg
->
replicaNum
;
++
i
)
{
const
SNodeInfo
*
pOldInfo
=
&
pOldCfg
->
nodeInfo
[
i
];
const
SNodeInfo
*
pNewInfo
=
&
pNewCfg
->
nodeInfo
[
i
];
if
(
strcmp
(
pOldInfo
->
nodeFqdn
,
pNewInfo
->
nodeFqdn
)
!=
0
)
return
true
;
if
(
pOldInfo
->
nodePort
!=
pNewInfo
->
nodePort
)
return
true
;
}
return
false
;
}
void
syncNodeDoConfigChange
(
SSyncNode
*
pSyncNode
,
SSyncCfg
*
pNewConfig
,
SyncIndex
lastConfigChangeIndex
)
{
SSyncCfg
oldConfig
=
pSyncNode
->
pRaftCfg
->
cfg
;
if
(
!
syncIsConfigChanged
(
&
oldConfig
,
pNewConfig
))
{
sInfo
(
"vgId:1, sync not reconfig since not changed"
);
return
;
}
pSyncNode
->
pRaftCfg
->
cfg
=
*
pNewConfig
;
pSyncNode
->
pRaftCfg
->
lastConfigIndex
=
lastConfigChangeIndex
;
...
...
@@ -1993,13 +2079,14 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
syncUtilnodeInfo2raftId
(
&
pSyncNode
->
pRaftCfg
->
cfg
.
nodeInfo
[
i
],
pSyncNode
->
vgId
,
&
pSyncNode
->
replicasId
[
i
]);
}
// update quorum first
pSyncNode
->
quorum
=
syncUtilQuorum
(
pSyncNode
->
pRaftCfg
->
cfg
.
replicaNum
);
syncIndexMgrUpdate
(
pSyncNode
->
pNextIndex
,
pSyncNode
);
syncIndexMgrUpdate
(
pSyncNode
->
pMatchIndex
,
pSyncNode
);
voteGrantedUpdate
(
pSyncNode
->
pVotesGranted
,
pSyncNode
);
votesRespondUpdate
(
pSyncNode
->
pVotesRespond
,
pSyncNode
);
pSyncNode
->
quorum
=
syncUtilQuorum
(
pSyncNode
->
pRaftCfg
->
cfg
.
replicaNum
);
// reset snapshot senders
// clear new
...
...
@@ -2148,6 +2235,30 @@ void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term) {
}
}
void
syncNodeStepDown
(
SSyncNode
*
pSyncNode
,
SyncTerm
newTerm
)
{
ASSERT
(
pSyncNode
->
pRaftStore
->
currentTerm
<=
newTerm
);
do
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"step down, new-term:%lu, current-term:%lu"
,
newTerm
,
pSyncNode
->
pRaftStore
->
currentTerm
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
while
(
0
);
if
(
pSyncNode
->
pRaftStore
->
currentTerm
<
newTerm
)
{
raftStoreSetTerm
(
pSyncNode
->
pRaftStore
,
newTerm
);
char
tmpBuf
[
64
];
snprintf
(
tmpBuf
,
sizeof
(
tmpBuf
),
"step down, update term to %"
PRIu64
,
newTerm
);
syncNodeBecomeFollower
(
pSyncNode
,
tmpBuf
);
raftStoreClearVote
(
pSyncNode
->
pRaftStore
);
}
else
{
if
(
pSyncNode
->
state
!=
TAOS_SYNC_STATE_FOLLOWER
)
{
syncNodeBecomeFollower
(
pSyncNode
,
"step down"
);
}
}
}
void
syncNodeLeaderChangeRsp
(
SSyncNode
*
pSyncNode
)
{
syncRespCleanRsp
(
pSyncNode
->
pSyncRespMgr
);
}
void
syncNodeBecomeFollower
(
SSyncNode
*
pSyncNode
,
const
char
*
debugStr
)
{
...
...
@@ -2171,6 +2282,9 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode
->
pFsm
->
FpBecomeFollowerCb
(
pSyncNode
->
pFsm
);
}
// min match index
pSyncNode
->
minMatchIndex
=
SYNC_INDEX_INVALID
;
// trace log
do
{
int32_t
debugStrLen
=
strlen
(
debugStr
);
...
...
@@ -2237,6 +2351,9 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode
->
pMatchIndex
->
index
[
i
]
=
SYNC_INDEX_INVALID
;
}
// init peer mgr
syncNodePeerStateInit
(
pSyncNode
);
// update sender private term
SSyncSnapshotSender
*
pMySender
=
syncNodeGetSnapshotSender
(
pSyncNode
,
&
(
pSyncNode
->
myRaftId
));
if
(
pMySender
!=
NULL
)
{
...
...
@@ -2259,11 +2376,17 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
// start heartbeat timer
syncNodeStartHeartbeatTimer
(
pSyncNode
);
// send heartbeat right now
syncNodeHeartbeatPeers
(
pSyncNode
);
// call back
if
(
pSyncNode
->
pFsm
!=
NULL
&&
pSyncNode
->
pFsm
->
FpBecomeLeaderCb
!=
NULL
)
{
pSyncNode
->
pFsm
->
FpBecomeLeaderCb
(
pSyncNode
->
pFsm
);
}
// min match index
pSyncNode
->
minMatchIndex
=
SYNC_INDEX_INVALID
;
// trace log
do
{
int32_t
debugStrLen
=
strlen
(
debugStr
);
...
...
@@ -2282,7 +2405,7 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
void
syncNodeCandidate2Leader
(
SSyncNode
*
pSyncNode
)
{
ASSERT
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_CANDIDATE
);
//
ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted));
ASSERT
(
voteGrantedMajority
(
pSyncNode
->
pVotesGranted
));
syncNodeBecomeLeader
(
pSyncNode
,
"candidate to leader"
);
syncNodeLog2
(
"==state change syncNodeCandidate2Leader=="
,
pSyncNode
);
...
...
@@ -2290,6 +2413,21 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop
(
pSyncNode
);
syncMaybeAdvanceCommitIndex
(
pSyncNode
);
if
(
pSyncNode
->
replicaNum
>
1
)
{
syncNodeReplicate
(
pSyncNode
);
}
}
bool
syncNodeIsMnode
(
SSyncNode
*
pSyncNode
)
{
return
(
pSyncNode
->
vgId
==
1
);
}
int32_t
syncNodePeerStateInit
(
SSyncNode
*
pSyncNode
)
{
for
(
int
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
pSyncNode
->
peerStates
[
i
].
lastSendIndex
=
SYNC_INDEX_INVALID
;
pSyncNode
->
peerStates
[
i
].
lastSendTime
=
0
;
}
return
0
;
}
void
syncNodeFollower2Candidate
(
SSyncNode
*
pSyncNode
)
{
...
...
@@ -2475,35 +2613,35 @@ int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex
// for debug --------------
void
syncNodePrint
(
SSyncNode
*
pObj
)
{
char
*
serialized
=
syncNode2Str
(
pObj
);
printf
(
"syncNodePrint | len:%
lu
| %s
\n
"
,
strlen
(
serialized
),
serialized
);
printf
(
"syncNodePrint | len:%
"
PRIu64
"
| %s
\n
"
,
strlen
(
serialized
),
serialized
);
fflush
(
NULL
);
taosMemoryFree
(
serialized
);
}
void
syncNodePrint2
(
char
*
s
,
SSyncNode
*
pObj
)
{
char
*
serialized
=
syncNode2Str
(
pObj
);
printf
(
"syncNodePrint2 | len:%
lu
| %s | %s
\n
"
,
strlen
(
serialized
),
s
,
serialized
);
printf
(
"syncNodePrint2 | len:%
"
PRIu64
"
| %s | %s
\n
"
,
strlen
(
serialized
),
s
,
serialized
);
fflush
(
NULL
);
taosMemoryFree
(
serialized
);
}
void
syncNodeLog
(
SSyncNode
*
pObj
)
{
char
*
serialized
=
syncNode2Str
(
pObj
);
sTraceLong
(
"syncNodeLog | len:%
lu
| %s"
,
strlen
(
serialized
),
serialized
);
sTraceLong
(
"syncNodeLog | len:%
"
PRIu64
"
| %s"
,
strlen
(
serialized
),
serialized
);
taosMemoryFree
(
serialized
);
}
void
syncNodeLog2
(
char
*
s
,
SSyncNode
*
pObj
)
{
if
(
gRaftDetailLog
)
{
char
*
serialized
=
syncNode2Str
(
pObj
);
sTraceLong
(
"syncNodeLog2 | len:%
lu
| %s | %s"
,
strlen
(
serialized
),
s
,
serialized
);
sTraceLong
(
"syncNodeLog2 | len:%
"
PRIu64
"
| %s | %s"
,
strlen
(
serialized
),
s
,
serialized
);
taosMemoryFree
(
serialized
);
}
}
void
syncNodeLog3
(
char
*
s
,
SSyncNode
*
pObj
)
{
char
*
serialized
=
syncNode2Str
(
pObj
);
sTraceLong
(
"syncNodeLog3 | len:%
lu
| %s | %s"
,
strlen
(
serialized
),
s
,
serialized
);
sTraceLong
(
"syncNodeLog3 | len:%
"
PRIu64
"
| %s | %s"
,
strlen
(
serialized
),
s
,
serialized
);
taosMemoryFree
(
serialized
);
}
...
...
@@ -2619,6 +2757,67 @@ static void syncNodeEqHeartbeatTimer(void* param, void* tmrId) {
}
}
static
void
syncNodeEqPeerHeartbeatTimer
(
void
*
param
,
void
*
tmrId
)
{
SSyncHbTimerData
*
pData
=
(
SSyncHbTimerData
*
)
param
;
SSyncNode
*
pSyncNode
=
pData
->
pSyncNode
;
SSyncTimer
*
pSyncTimer
=
pData
->
pTimer
;
if
(
pSyncNode
->
state
!=
TAOS_SYNC_STATE_LEADER
)
{
return
;
}
syncNodeEventLog
(
pSyncNode
,
"eq peer hb timer"
);
int64_t
timerLogicClock
=
atomic_load_64
(
&
pSyncTimer
->
logicClock
);
int64_t
msgLogicClock
=
atomic_load_64
(
&
pData
->
logicClock
);
if
(
pSyncNode
->
replicaNum
>
1
)
{
if
(
timerLogicClock
==
msgLogicClock
)
{
SyncHeartbeat
*
pSyncMsg
=
syncHeartbeatBuild
(
pSyncNode
->
vgId
);
pSyncMsg
->
srcId
=
pSyncNode
->
myRaftId
;
pSyncMsg
->
destId
=
pData
->
destId
;
pSyncMsg
->
term
=
pSyncNode
->
pRaftStore
->
currentTerm
;
pSyncMsg
->
commitIndex
=
pSyncNode
->
commitIndex
;
pSyncMsg
->
minMatchIndex
=
syncMinMatchIndex
(
pSyncNode
);
pSyncMsg
->
privateTerm
=
0
;
SRpcMsg
rpcMsg
;
syncHeartbeat2RpcMsg
(
pSyncMsg
,
&
rpcMsg
);
// eq msg
#if 0
if (pSyncNode->FpEqCtrlMsg != NULL) {
int32_t code = pSyncNode->FpEqCtrlMsg(pSyncNode->msgcb, &rpcMsg);
if (code != 0) {
sError("vgId:%d, sync ctrl enqueue timer msg error, code:%d", pSyncNode->vgId, code);
rpcFreeCont(rpcMsg.pCont);
syncHeartbeatDestroy(pSyncMsg);
return;
}
} else {
sError("vgId:%d, enqueue ctrl msg cb ptr (i.e. FpEqMsg) not set.", pSyncNode->vgId);
}
#endif
// send msg
syncNodeSendHeartbeat
(
pSyncNode
,
&
(
pSyncMsg
->
destId
),
pSyncMsg
);
syncHeartbeatDestroy
(
pSyncMsg
);
if
(
syncEnvIsStart
())
{
taosTmrReset
(
syncNodeEqPeerHeartbeatTimer
,
pSyncTimer
->
timerMS
,
pData
,
gSyncEnv
->
pTimerManager
,
&
pSyncTimer
->
pTimer
);
}
else
{
sError
(
"sync env is stop, syncNodeEqHeartbeatTimer"
);
}
}
else
{
sTrace
(
"==syncNodeEqPeerHeartbeatTimer== timerLogicClock:%"
PRIu64
", msgLogicClock:%"
PRIu64
""
,
timerLogicClock
,
msgLogicClock
);
}
}
}
static
int32_t
syncNodeEqNoop
(
SSyncNode
*
ths
)
{
int32_t
ret
=
0
;
ASSERT
(
ths
->
state
==
TAOS_SYNC_STATE_LEADER
);
...
...
@@ -2676,10 +2875,9 @@ static int32_t syncNodeAppendNoop(SSyncNode* ths) {
if
(
ths
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
int32_t
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pEntry
);
if
(
code
!=
0
)
{
s
Error
(
"vgId:%d, failed to append log entry since %s"
,
ths
->
vgId
,
tstrerror
(
terrno
)
);
s
yncNodeErrorLog
(
ths
,
"append noop error"
);
return
-
1
;
}
syncNodeReplicate
(
ths
,
false
);
}
if
(
h
)
{
...
...
@@ -2738,13 +2936,15 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, SyncHeartbeat* pMsg) {
SRpcMsg
rpcMsg
;
syncHeartbeatReply2RpcMsg
(
pMsgReply
,
&
rpcMsg
);
#if 0
if (pMsg->term >= ths->pRaftStore->currentTerm && ths->state != TAOS_SYNC_STATE_FOLLOWER) {
syncNode
BecomeFollower
(
ths
,
"become follower by hb"
);
syncNode
StepDown(ths, pMsg->term
);
}
#endif
if
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
{
// sInfo("vgId:%d, heartbeat reset timer", ths->vgId);
if
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
&&
ths
->
state
!=
TAOS_SYNC_STATE_LEADER
)
{
syncNodeResetElectTimer
(
ths
);
ths
->
minMatchIndex
=
pMsg
->
minMatchIndex
;
#if 0
if (ths->state == TAOS_SYNC_STATE_FOLLOWER) {
...
...
@@ -2785,10 +2985,12 @@ int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, SyncHeartbeatReply* pMsg) {
// /\ UNCHANGED <<messages, serverVars, candidateVars,
// leaderVars, commitIndex>>
//
int32_t
syncNodeOnClientRequestCb
(
SSyncNode
*
ths
,
SyncClientRequest
*
pMsg
,
SyncIndex
*
pRetIndex
)
{
int32_t
syncNodeOnClientRequest
(
SSyncNode
*
ths
,
SyncClientRequest
*
pMsg
,
SyncIndex
*
pRetIndex
)
{
syncNodeEventLog
(
ths
,
"on client request"
);
int32_t
ret
=
0
;
int32_t
code
=
0
;
syncClientRequestLog2
(
"==syncNodeOnClientRequestCb=="
,
pMsg
);
SyncIndex
index
=
ths
->
pLogStore
->
syncLogWriteIndex
(
ths
->
pLogStore
);
SyncTerm
term
=
ths
->
pRaftStore
->
currentTerm
;
...
...
@@ -2803,16 +3005,13 @@ int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncI
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pEntry
);
if
(
code
!=
0
)
{
// del resp mgr, call FpCommitCb
sError
(
"vgId:%d, failed to append log entry since %s"
,
ths
->
vgId
,
tstrerror
(
terrno
)
);
ASSERT
(
0
);
return
-
1
;
}
// if mulit replica, start replicate right now
if
(
ths
->
replicaNum
>
1
)
{
syncNodeReplicate
(
ths
,
false
);
// pre commit
syncNodePreCommit
(
ths
,
pEntry
,
0
);
syncNodeReplicate
(
ths
);
}
// if only myself, maybe commit right now
...
...
@@ -2838,61 +3037,6 @@ int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncI
return
ret
;
}
int32_t
syncNodeOnClientRequestBatchCb
(
SSyncNode
*
ths
,
SyncClientRequestBatch
*
pMsg
)
{
int32_t
code
=
0
;
if
(
ths
->
state
!=
TAOS_SYNC_STATE_LEADER
)
{
// call FpCommitCb, delete resp mgr
return
-
1
;
}
SyncIndex
index
=
ths
->
pLogStore
->
syncLogWriteIndex
(
ths
->
pLogStore
);
SyncTerm
term
=
ths
->
pRaftStore
->
currentTerm
;
int32_t
raftMetaArrayLen
=
sizeof
(
SRaftMeta
)
*
pMsg
->
dataCount
;
int32_t
rpcArrayLen
=
sizeof
(
SRpcMsg
)
*
pMsg
->
dataCount
;
SRaftMeta
*
raftMetaArr
=
(
SRaftMeta
*
)(
pMsg
->
data
);
SRpcMsg
*
msgArr
=
(
SRpcMsg
*
)((
char
*
)(
pMsg
->
data
)
+
raftMetaArrayLen
);
for
(
int32_t
i
=
0
;
i
<
pMsg
->
dataCount
;
++
i
)
{
SSyncRaftEntry
*
pEntry
=
syncEntryBuild
(
msgArr
[
i
].
contLen
);
ASSERT
(
pEntry
!=
NULL
);
pEntry
->
originalRpcType
=
msgArr
[
i
].
msgType
;
pEntry
->
seqNum
=
raftMetaArr
[
i
].
seqNum
;
pEntry
->
isWeak
=
raftMetaArr
[
i
].
isWeak
;
pEntry
->
term
=
term
;
pEntry
->
index
=
index
;
memcpy
(
pEntry
->
data
,
msgArr
[
i
].
pCont
,
msgArr
[
i
].
contLen
);
ASSERT
(
msgArr
[
i
].
contLen
==
pEntry
->
dataLen
);
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pEntry
);
if
(
code
!=
0
)
{
sError
(
"vgId:%d, failed to append log entry since %s"
,
ths
->
vgId
,
tstrerror
(
terrno
));
// del resp mgr, call FpCommitCb
return
-
1
;
}
// update rpc msg conn apply.index
msgArr
[
i
].
info
.
conn
.
applyIndex
=
pEntry
->
index
;
}
// fsync once
SSyncLogStoreData
*
pData
=
ths
->
pLogStore
->
data
;
SWal
*
pWal
=
pData
->
pWal
;
walFsync
(
pWal
,
false
);
if
(
ths
->
replicaNum
>
1
)
{
// if multi replica, start replicate right now
syncNodeReplicate
(
ths
,
false
);
}
else
if
(
ths
->
replicaNum
==
1
)
{
// one replica
syncMaybeAdvanceCommitIndex
(
ths
);
}
return
0
;
}
const
char
*
syncStr
(
ESyncState
state
)
{
switch
(
state
)
{
case
TAOS_SYNC_STATE_FOLLOWER
:
...
...
@@ -2919,14 +3063,14 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p
if
(
pEntry
->
term
<
ths
->
pRaftStore
->
currentTerm
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"little term:%
"
PRIu64
"
, can not do leader transfer"
,
pEntry
->
term
);
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"little term:%
lu
, can not do leader transfer"
,
pEntry
->
term
);
syncNodeEventLog
(
ths
,
logBuf
);
return
0
;
}
if
(
pEntry
->
index
<
syncNodeGetLastIndex
(
ths
))
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"little index:%
"
PRId64
"
, can not do leader transfer"
,
pEntry
->
index
);
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"little index:%
ld
, can not do leader transfer"
,
pEntry
->
index
);
syncNodeEventLog
(
ths
,
logBuf
);
return
0
;
}
...
...
@@ -2942,7 +3086,7 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p
do
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"do leader transfer, index:%
"
PRId64
"
"
,
pEntry
->
index
);
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"do leader transfer, index:%
ld
"
,
pEntry
->
index
);
syncNodeEventLog
(
ths
,
logBuf
);
}
while
(
0
);
...
...
@@ -3081,11 +3225,10 @@ static int32_t syncNodeProposeConfigChangeFinish(SSyncNode* ths, SyncReconfigFin
}
bool
syncNodeIsOptimizedOneReplica
(
SSyncNode
*
ths
,
SRpcMsg
*
pMsg
)
{
return
(
ths
->
replicaNum
==
1
&&
syncUtilUserCommit
(
pMsg
->
msgType
));
// return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1);
return
(
ths
->
replicaNum
==
1
&&
syncUtilUserCommit
(
pMsg
->
msgType
)
&&
ths
->
vgId
!=
1
);
}
int32_t
syncNodeCommit
(
SSyncNode
*
ths
,
SyncIndex
beginIndex
,
SyncIndex
endIndex
,
uint64_t
flag
)
{
int32_t
syncNode
Do
Commit
(
SSyncNode
*
ths
,
SyncIndex
beginIndex
,
SyncIndex
endIndex
,
uint64_t
flag
)
{
if
(
beginIndex
>
endIndex
)
{
return
0
;
}
...
...
@@ -3121,10 +3264,7 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex,
pEntry
=
(
SSyncRaftEntry
*
)
taosLRUCacheValue
(
pCache
,
h
);
}
else
{
code
=
ths
->
pLogStore
->
syncLogGetEntry
(
ths
->
pLogStore
,
i
,
&
pEntry
);
if
(
code
!=
0
)
{
sError
(
"vgId:%d, failed to get log entry since %s. index:%"
PRId64
""
,
ths
->
vgId
,
tstrerror
(
terrno
),
i
);
return
-
1
;
}
ASSERT
(
code
==
0
);
ASSERT
(
pEntry
!=
NULL
);
}
...
...
@@ -3134,8 +3274,7 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex,
// user commit
if
((
ths
->
pFsm
->
FpCommitCb
!=
NULL
)
&&
syncUtilUserCommit
(
pEntry
->
originalRpcType
))
{
bool
internalExecute
=
true
;
if
((
ths
->
replicaNum
==
1
)
&&
ths
->
restoreFinish
)
{
// if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) {
if
((
ths
->
replicaNum
==
1
)
&&
ths
->
restoreFinish
&&
ths
->
vgId
!=
1
)
{
internalExecute
=
false
;
}
...
...
@@ -3203,8 +3342,8 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex,
int64_t
restoreDelay
=
taosGetTimestampMs
()
-
ths
->
leaderTime
;
char
eventLog
[
128
];
snprintf
(
eventLog
,
sizeof
(
eventLog
),
"restore finish, index:%
"
PRId64
", elapsed:%"
PRId64
" ms, "
,
pEntry
->
index
,
restoreDelay
);
snprintf
(
eventLog
,
sizeof
(
eventLog
),
"restore finish, index:%
ld, elapsed:%ld ms, "
,
pEntry
->
index
,
restoreDelay
);
syncNodeEventLog
(
ths
,
eventLog
);
}
}
...
...
@@ -3240,6 +3379,40 @@ SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId)
return
pSender
;
}
SSyncTimer
*
syncNodeGetHbTimer
(
SSyncNode
*
ths
,
SRaftId
*
pDestId
)
{
SSyncTimer
*
pTimer
=
NULL
;
for
(
int
i
=
0
;
i
<
ths
->
replicaNum
;
++
i
)
{
if
(
syncUtilSameId
(
pDestId
,
&
((
ths
->
replicasId
)[
i
])))
{
pTimer
=
&
((
ths
->
peerHeartbeatTimerArr
)[
i
]);
}
}
return
pTimer
;
}
SPeerState
*
syncNodeGetPeerState
(
SSyncNode
*
ths
,
const
SRaftId
*
pDestId
)
{
SPeerState
*
pState
=
NULL
;
for
(
int
i
=
0
;
i
<
ths
->
replicaNum
;
++
i
)
{
if
(
syncUtilSameId
(
pDestId
,
&
((
ths
->
replicasId
)[
i
])))
{
pState
=
&
((
ths
->
peerStates
)[
i
]);
}
}
return
pState
;
}
bool
syncNodeNeedSendAppendEntries
(
SSyncNode
*
ths
,
const
SRaftId
*
pDestId
,
const
SyncAppendEntries
*
pMsg
)
{
SPeerState
*
pState
=
syncNodeGetPeerState
(
ths
,
pDestId
);
ASSERT
(
pState
!=
NULL
);
SyncIndex
sendIndex
=
pMsg
->
prevLogIndex
+
1
;
int64_t
tsNow
=
taosGetTimestampMs
();
if
(
pState
->
lastSendIndex
==
sendIndex
&&
tsNow
-
pState
->
lastSendTime
<
SYNC_APPEND_ENTRIES_TIMEOUT_MS
)
{
return
false
;
}
return
true
;
}
bool
syncNodeCanChange
(
SSyncNode
*
pSyncNode
)
{
if
(
pSyncNode
->
changing
)
{
sError
(
"sync cannot change"
);
...
...
@@ -3265,6 +3438,25 @@ bool syncNodeCanChange(SSyncNode* pSyncNode) {
return
true
;
}
const
char
*
syncTimerTypeStr
(
enum
ESyncTimeoutType
timerType
)
{
if
(
timerType
==
SYNC_TIMEOUT_PING
)
{
return
"ping"
;
}
else
if
(
timerType
==
SYNC_TIMEOUT_ELECTION
)
{
return
"elect"
;
}
else
if
(
timerType
==
SYNC_TIMEOUT_HEARTBEAT
)
{
return
"heartbeat"
;
}
else
{
return
"unknown"
;
}
}
void
syncLogRecvTimer
(
SSyncNode
*
pSyncNode
,
const
SyncTimeout
*
pMsg
,
const
char
*
s
)
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"recv sync-timer {type:%s, lc:%lu, ms:%d, data:%p}, %s"
,
syncTimerTypeStr
(
pMsg
->
timeoutType
),
pMsg
->
logicClock
,
pMsg
->
timerMS
,
pMsg
->
data
,
s
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
void
syncLogSendRequestVote
(
SSyncNode
*
pSyncNode
,
const
SyncRequestVote
*
pMsg
,
const
char
*
s
)
{
char
host
[
64
];
uint16_t
port
;
...
...
@@ -3393,8 +3585,9 @@ void syncLogSendHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, const
syncUtilU642Addr
(
pMsg
->
destId
.
addr
,
host
,
sizeof
(
host
),
&
port
);
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"send sync-heartbeat from %s:%d {term:%"
PRIu64
", cmt:%"
PRIu64
", pterm:%"
PRIu64
"}, %s"
,
host
,
port
,
pMsg
->
term
,
pMsg
->
commitIndex
,
pMsg
->
privateTerm
,
s
);
"send sync-heartbeat from %s:%d {term:%"
PRIu64
", cmt:%"
PRId64
", min-match:%"
PRId64
", pterm:%"
PRIu64
"}, %s"
,
host
,
port
,
pMsg
->
term
,
pMsg
->
commitIndex
,
pMsg
->
minMatchIndex
,
pMsg
->
privateTerm
,
s
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
...
...
@@ -3404,8 +3597,9 @@ void syncLogRecvHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, const
syncUtilU642Addr
(
pMsg
->
srcId
.
addr
,
host
,
sizeof
(
host
),
&
port
);
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"recv sync-heartbeat from %s:%d {term:%"
PRIu64
", cmt:%"
PRIu64
", pterm:%"
PRIu64
"}, %s"
,
host
,
port
,
pMsg
->
term
,
pMsg
->
commitIndex
,
pMsg
->
privateTerm
,
s
);
"recv sync-heartbeat from %s:%d {term:%"
PRIu64
", cmt:%"
PRId64
", min-match:%"
PRId64
", pterm:%"
PRIu64
"}, %s"
,
host
,
port
,
pMsg
->
term
,
pMsg
->
commitIndex
,
pMsg
->
minMatchIndex
,
pMsg
->
privateTerm
,
s
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
...
...
@@ -3427,4 +3621,4 @@ void syncLogRecvHeartbeatReply(SSyncNode* pSyncNode, const SyncHeartbeatReply* p
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"recv sync-heartbeat-reply from %s:%d {term:%"
PRIu64
", pterm:%"
PRIu64
"}, %s"
,
host
,
port
,
pMsg
->
term
,
pMsg
->
privateTerm
,
s
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
}
\ No newline at end of file
source/libs/sync/src/syncRaftCfg.c
浏览文件 @
5965629b
...
...
@@ -29,7 +29,7 @@ SRaftCfgIndex *raftCfgIndexOpen(const char *path) {
taosLSeekFile
(
pRaftCfgIndex
->
pFile
,
0
,
SEEK_SET
);
int32_t
bufLen
=
MAX_CONFIG_INDEX_COUNT
*
16
;
char
*
pBuf
=
taosMemoryMalloc
(
bufLen
);
char
*
pBuf
=
taosMemoryMalloc
(
bufLen
);
memset
(
pBuf
,
0
,
bufLen
);
int64_t
len
=
taosReadFile
(
pRaftCfgIndex
->
pFile
,
pBuf
,
bufLen
);
ASSERT
(
len
>
0
);
...
...
@@ -93,7 +93,7 @@ cJSON *raftCfgIndex2Json(SRaftCfgIndex *pRaftCfgIndex) {
char
*
raftCfgIndex2Str
(
SRaftCfgIndex
*
pRaftCfgIndex
)
{
cJSON
*
pJson
=
raftCfgIndex2Json
(
pRaftCfgIndex
);
char
*
serialized
=
cJSON_Print
(
pJson
);
char
*
serialized
=
cJSON_Print
(
pJson
);
cJSON_Delete
(
pJson
);
return
serialized
;
}
...
...
@@ -151,7 +151,7 @@ int32_t raftCfgIndexCreateFile(const char *path) {
raftCfgIndex
.
configIndexCount
=
1
;
raftCfgIndex
.
configIndexArr
[
0
]
=
-
1
;
char
*
s
=
raftCfgIndex2Str
(
&
raftCfgIndex
);
char
*
s
=
raftCfgIndex2Str
(
&
raftCfgIndex
);
int64_t
ret
=
taosWriteFile
(
pFile
,
s
,
strlen
(
s
)
+
1
);
ASSERT
(
ret
==
strlen
(
s
)
+
1
);
...
...
@@ -244,7 +244,7 @@ cJSON *syncCfg2Json(SSyncCfg *pSyncCfg) {
char
*
syncCfg2Str
(
SSyncCfg
*
pSyncCfg
)
{
cJSON
*
pJson
=
syncCfg2Json
(
pSyncCfg
);
char
*
serialized
=
cJSON_Print
(
pJson
);
char
*
serialized
=
cJSON_Print
(
pJson
);
cJSON_Delete
(
pJson
);
return
serialized
;
}
...
...
@@ -252,7 +252,7 @@ char *syncCfg2Str(SSyncCfg *pSyncCfg) {
char
*
syncCfg2SimpleStr
(
SSyncCfg
*
pSyncCfg
)
{
if
(
pSyncCfg
!=
NULL
)
{
int32_t
len
=
512
;
char
*
s
=
taosMemoryMalloc
(
len
);
char
*
s
=
taosMemoryMalloc
(
len
);
memset
(
s
,
0
,
len
);
snprintf
(
s
,
len
,
"{r-num:%d, my:%d, "
,
pSyncCfg
->
replicaNum
,
pSyncCfg
->
myIndex
);
...
...
@@ -349,7 +349,7 @@ cJSON *raftCfg2Json(SRaftCfg *pRaftCfg) {
char
*
raftCfg2Str
(
SRaftCfg
*
pRaftCfg
)
{
cJSON
*
pJson
=
raftCfg2Json
(
pRaftCfg
);
char
*
serialized
=
cJSON_Print
(
pJson
);
char
*
serialized
=
cJSON_Print
(
pJson
);
cJSON_Delete
(
pJson
);
return
serialized
;
}
...
...
@@ -426,7 +426,7 @@ int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg) {
(
pRaftCfg
->
configIndexArr
)[
i
]
=
atoll
(
pIndex
->
valuestring
);
}
cJSON
*
pJsonSyncCfg
=
cJSON_GetObjectItem
(
pJson
,
"SSyncCfg"
);
cJSON
*
pJsonSyncCfg
=
cJSON_GetObjectItem
(
pJson
,
"SSyncCfg"
);
int32_t
code
=
syncCfgFromJson
(
pJsonSyncCfg
,
&
(
pRaftCfg
->
cfg
));
ASSERT
(
code
==
0
);
...
...
source/libs/sync/src/syncRaftLog.c
浏览文件 @
5965629b
...
...
@@ -33,21 +33,11 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEn
static
int32_t
raftLogGetEntry
(
struct
SSyncLogStore
*
pLogStore
,
SyncIndex
index
,
SSyncRaftEntry
**
ppEntry
);
static
int32_t
raftLogTruncate
(
struct
SSyncLogStore
*
pLogStore
,
SyncIndex
fromIndex
);
static
bool
raftLogExist
(
struct
SSyncLogStore
*
pLogStore
,
SyncIndex
index
);
static
int32_t
raftLogUpdateCommitIndex
(
SSyncLogStore
*
pLogStore
,
SyncIndex
index
);
static
SyncIndex
raftlogCommitIndex
(
SSyncLogStore
*
pLogStore
);
// private function
static
int32_t
raftLogGetLastEntry
(
SSyncLogStore
*
pLogStore
,
SSyncRaftEntry
**
ppLastEntry
);
//-------------------------------
// log[0 .. n]
static
SSyncRaftEntry
*
logStoreGetLastEntry
(
SSyncLogStore
*
pLogStore
);
static
SyncIndex
logStoreLastIndex
(
SSyncLogStore
*
pLogStore
);
static
SyncTerm
logStoreLastTerm
(
SSyncLogStore
*
pLogStore
);
static
SSyncRaftEntry
*
logStoreGetEntry
(
SSyncLogStore
*
pLogStore
,
SyncIndex
index
);
static
int32_t
logStoreAppendEntry
(
SSyncLogStore
*
pLogStore
,
SSyncRaftEntry
*
pEntry
);
static
int32_t
logStoreTruncate
(
SSyncLogStore
*
pLogStore
,
SyncIndex
fromIndex
);
static
int32_t
logStoreUpdateCommitIndex
(
SSyncLogStore
*
pLogStore
,
SyncIndex
index
);
static
SyncIndex
logStoreGetCommitIndex
(
SSyncLogStore
*
pLogStore
);
//-------------------------------
SSyncLogStore
*
logStoreCreate
(
SSyncNode
*
pSyncNode
)
{
SSyncLogStore
*
pLogStore
=
taosMemoryMalloc
(
sizeof
(
SSyncLogStore
));
...
...
@@ -74,14 +64,8 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) {
pData
->
pWalHandle
=
walOpenReader
(
pData
->
pWal
,
NULL
);
ASSERT
(
pData
->
pWalHandle
!=
NULL
);
pLogStore
->
appendEntry
=
logStoreAppendEntry
;
pLogStore
->
getEntry
=
logStoreGetEntry
;
pLogStore
->
truncate
=
logStoreTruncate
;
pLogStore
->
getLastIndex
=
logStoreLastIndex
;
pLogStore
->
getLastTerm
=
logStoreLastTerm
;
pLogStore
->
updateCommitIndex
=
logStoreUpdateCommitIndex
;
pLogStore
->
getCommitIndex
=
logStoreGetCommitIndex
;
pLogStore
->
syncLogUpdateCommitIndex
=
raftLogUpdateCommitIndex
;
pLogStore
->
syncLogCommitIndex
=
raftlogCommitIndex
;
pLogStore
->
syncLogRestoreFromSnapshot
=
raftLogRestoreFromSnapshot
;
pLogStore
->
syncLogBeginIndex
=
raftLogBeginIndex
;
pLogStore
->
syncLogEndIndex
=
raftLogEndIndex
;
...
...
@@ -234,6 +218,8 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal write error, index:%"
PRId64
", err:%d %X, msg:%s, syserr:%d, sysmsg:%s"
,
pEntry
->
index
,
err
,
err
,
errStr
,
sysErr
,
sysErrStr
);
syncNodeErrorLog
(
pData
->
pSyncNode
,
logBuf
);
ASSERT
(
0
);
return
-
1
;
}
pEntry
->
index
=
index
;
...
...
@@ -277,11 +263,15 @@ static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index,
do
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal read error, index:%"
PRId64
", err:%d %X, msg:%s, syserr:%d, sysmsg:%s"
,
index
,
err
,
err
,
errStr
,
sysErr
,
sysErrStr
);
if
(
terrno
==
TSDB_CODE_WAL_LOG_NOT_EXIST
)
{
// syncNodeEventLog(pData->pSyncNode, logBuf);
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal read not exist, index:%"
PRId64
", err:%d %X, msg:%s, syserr:%d, sysmsg:%s"
,
index
,
err
,
err
,
errStr
,
sysErr
,
sysErrStr
);
syncNodeEventLog
(
pData
->
pSyncNode
,
logBuf
);
}
else
{
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal read error, index:%"
PRId64
", err:%d %X, msg:%s, syserr:%d, sysmsg:%s"
,
index
,
err
,
err
,
errStr
,
sysErr
,
sysErrStr
);
syncNodeErrorLog
(
pData
->
pSyncNode
,
logBuf
);
}
}
while
(
0
);
...
...
@@ -372,157 +362,7 @@ static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** pp
return
-
1
;
}
//-------------------------------
// log[0 .. n]
int32_t
logStoreAppendEntry
(
SSyncLogStore
*
pLogStore
,
SSyncRaftEntry
*
pEntry
)
{
SSyncLogStoreData
*
pData
=
pLogStore
->
data
;
SWal
*
pWal
=
pData
->
pWal
;
SyncIndex
index
=
0
;
SWalSyncInfo
syncMeta
=
{
0
};
syncMeta
.
isWeek
=
pEntry
->
isWeak
;
syncMeta
.
seqNum
=
pEntry
->
seqNum
;
syncMeta
.
term
=
pEntry
->
term
;
index
=
walAppendLog
(
pWal
,
pEntry
->
originalRpcType
,
syncMeta
,
pEntry
->
data
,
pEntry
->
dataLen
);
if
(
index
<
0
)
{
int32_t
err
=
terrno
;
const
char
*
errStr
=
tstrerror
(
err
);
int32_t
sysErr
=
errno
;
const
char
*
sysErrStr
=
strerror
(
errno
);
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal write error, index:%"
PRId64
", err:%d %X, msg:%s, syserr:%d, sysmsg:%s"
,
pEntry
->
index
,
err
,
err
,
errStr
,
sysErr
,
sysErrStr
);
syncNodeErrorLog
(
pData
->
pSyncNode
,
logBuf
);
ASSERT
(
0
);
return
-
1
;
}
pEntry
->
index
=
index
;
do
{
char
eventLog
[
128
];
snprintf
(
eventLog
,
sizeof
(
eventLog
),
"write2 index:%"
PRId64
", type:%s, origin type:%s"
,
pEntry
->
index
,
TMSG_INFO
(
pEntry
->
msgType
),
TMSG_INFO
(
pEntry
->
originalRpcType
));
syncNodeEventLog
(
pData
->
pSyncNode
,
eventLog
);
}
while
(
0
);
return
0
;
}
SSyncRaftEntry
*
logStoreGetEntryWithoutLock
(
SSyncLogStore
*
pLogStore
,
SyncIndex
index
)
{
SSyncLogStoreData
*
pData
=
pLogStore
->
data
;
SWal
*
pWal
=
pData
->
pWal
;
if
(
index
>=
SYNC_INDEX_BEGIN
&&
index
<=
logStoreLastIndex
(
pLogStore
))
{
// SWalReadHandle* pWalHandle = walOpenReadHandle(pWal);
SWalReader
*
pWalHandle
=
pData
->
pWalHandle
;
ASSERT
(
pWalHandle
!=
NULL
);
int32_t
code
=
walReadVer
(
pWalHandle
,
index
);
// int32_t code = walReadVerCached(pWalHandle, index);
if
(
code
!=
0
)
{
int32_t
err
=
terrno
;
const
char
*
errStr
=
tstrerror
(
err
);
int32_t
sysErr
=
errno
;
const
char
*
sysErrStr
=
strerror
(
errno
);
do
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal read error, index:%"
PRId64
", err:%d %X, msg:%s, syserr:%d, sysmsg:%s"
,
index
,
err
,
err
,
errStr
,
sysErr
,
sysErrStr
);
if
(
terrno
==
TSDB_CODE_WAL_LOG_NOT_EXIST
)
{
// syncNodeEventLog(pData->pSyncNode, logBuf);
}
else
{
syncNodeErrorLog
(
pData
->
pSyncNode
,
logBuf
);
}
}
while
(
0
);
sError
(
"failed to read ver since %s. index:%"
PRId64
""
,
tstrerror
(
terrno
),
index
);
return
NULL
;
}
SSyncRaftEntry
*
pEntry
=
syncEntryBuild
(
pWalHandle
->
pHead
->
head
.
bodyLen
);
ASSERT
(
pEntry
!=
NULL
);
pEntry
->
msgType
=
TDMT_SYNC_CLIENT_REQUEST
;
pEntry
->
originalRpcType
=
pWalHandle
->
pHead
->
head
.
msgType
;
pEntry
->
seqNum
=
pWalHandle
->
pHead
->
head
.
syncMeta
.
seqNum
;
pEntry
->
isWeak
=
pWalHandle
->
pHead
->
head
.
syncMeta
.
isWeek
;
pEntry
->
term
=
pWalHandle
->
pHead
->
head
.
syncMeta
.
term
;
pEntry
->
index
=
index
;
ASSERT
(
pEntry
->
dataLen
==
pWalHandle
->
pHead
->
head
.
bodyLen
);
memcpy
(
pEntry
->
data
,
pWalHandle
->
pHead
->
head
.
body
,
pWalHandle
->
pHead
->
head
.
bodyLen
);
/*
int32_t saveErr = terrno;
walCloseReadHandle(pWalHandle);
terrno = saveErr;
*/
return
pEntry
;
}
else
{
return
NULL
;
}
}
SSyncRaftEntry
*
logStoreGetEntry
(
SSyncLogStore
*
pLogStore
,
SyncIndex
index
)
{
SSyncLogStoreData
*
pData
=
pLogStore
->
data
;
SSyncRaftEntry
*
pEntry
=
NULL
;
taosThreadMutexLock
(
&
pData
->
mutex
);
pEntry
=
logStoreGetEntryWithoutLock
(
pLogStore
,
index
);
taosThreadMutexUnlock
(
&
pData
->
mutex
);
return
pEntry
;
}
int32_t
logStoreTruncate
(
SSyncLogStore
*
pLogStore
,
SyncIndex
fromIndex
)
{
SSyncLogStoreData
*
pData
=
pLogStore
->
data
;
SWal
*
pWal
=
pData
->
pWal
;
// ASSERT(walRollback(pWal, fromIndex) == 0);
int32_t
code
=
walRollback
(
pWal
,
fromIndex
);
if
(
code
!=
0
)
{
int32_t
err
=
terrno
;
const
char
*
errStr
=
tstrerror
(
err
);
int32_t
sysErr
=
errno
;
const
char
*
sysErrStr
=
strerror
(
errno
);
sError
(
"vgId:%d, wal truncate error, from-index:%"
PRId64
", err:%d %X, msg:%s, syserr:%d, sysmsg:%s"
,
pData
->
pSyncNode
->
vgId
,
fromIndex
,
err
,
err
,
errStr
,
sysErr
,
sysErrStr
);
ASSERT
(
0
);
}
// event log
do
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal truncate, from-index:%"
PRId64
,
fromIndex
);
syncNodeEventLog
(
pData
->
pSyncNode
,
logBuf
);
}
while
(
0
);
return
0
;
}
SyncIndex
logStoreLastIndex
(
SSyncLogStore
*
pLogStore
)
{
SSyncLogStoreData
*
pData
=
pLogStore
->
data
;
SWal
*
pWal
=
pData
->
pWal
;
SyncIndex
lastIndex
=
walGetLastVer
(
pWal
);
return
lastIndex
;
}
SyncTerm
logStoreLastTerm
(
SSyncLogStore
*
pLogStore
)
{
SyncTerm
lastTerm
=
0
;
SSyncRaftEntry
*
pLastEntry
=
logStoreGetLastEntry
(
pLogStore
);
if
(
pLastEntry
!=
NULL
)
{
lastTerm
=
pLastEntry
->
term
;
taosMemoryFree
(
pLastEntry
);
}
return
lastTerm
;
}
int32_t
logStoreUpdateCommitIndex
(
SSyncLogStore
*
pLogStore
,
SyncIndex
index
)
{
int32_t
raftLogUpdateCommitIndex
(
SSyncLogStore
*
pLogStore
,
SyncIndex
index
)
{
SSyncLogStoreData
*
pData
=
pLogStore
->
data
;
SWal
*
pWal
=
pData
->
pWal
;
// ASSERT(walCommit(pWal, index) == 0);
...
...
@@ -540,23 +380,11 @@ int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) {
return
0
;
}
SyncIndex
logStoreGet
CommitIndex
(
SSyncLogStore
*
pLogStore
)
{
SyncIndex
raftlog
CommitIndex
(
SSyncLogStore
*
pLogStore
)
{
SSyncLogStoreData
*
pData
=
pLogStore
->
data
;
return
pData
->
pSyncNode
->
commitIndex
;
}
SSyncRaftEntry
*
logStoreGetLastEntry
(
SSyncLogStore
*
pLogStore
)
{
SSyncLogStoreData
*
pData
=
pLogStore
->
data
;
SWal
*
pWal
=
pData
->
pWal
;
SyncIndex
lastIndex
=
walGetLastVer
(
pWal
);
SSyncRaftEntry
*
pEntry
=
NULL
;
if
(
lastIndex
>
0
)
{
pEntry
=
logStoreGetEntry
(
pLogStore
,
lastIndex
);
}
return
pEntry
;
}
cJSON
*
logStore2Json
(
SSyncLogStore
*
pLogStore
)
{
char
u64buf
[
128
]
=
{
0
};
SSyncLogStoreData
*
pData
=
(
SSyncLogStoreData
*
)
pLogStore
->
data
;
...
...
@@ -595,7 +423,9 @@ cJSON* logStore2Json(SSyncLogStore* pLogStore) {
if
(
!
raftLogIsEmpty
(
pLogStore
))
{
for
(
SyncIndex
i
=
beginIndex
;
i
<=
endIndex
;
++
i
)
{
SSyncRaftEntry
*
pEntry
=
logStoreGetEntry
(
pLogStore
,
i
);
SSyncRaftEntry
*
pEntry
=
NULL
;
raftLogGetEntry
(
pLogStore
,
i
,
&
pEntry
);
cJSON_AddItemToArray
(
pEntries
,
syncEntry2Json
(
pEntry
));
syncEntryDestory
(
pEntry
);
}
...
...
@@ -675,14 +505,14 @@ SyncIndex logStoreWalCommitVer(SSyncLogStore* pLogStore) {
// for debug -----------------
void
logStorePrint
(
SSyncLogStore
*
pLogStore
)
{
char
*
serialized
=
logStore2Str
(
pLogStore
);
printf
(
"logStorePrint | len:%
lu
| %s
\n
"
,
strlen
(
serialized
),
serialized
);
printf
(
"logStorePrint | len:%
"
PRIu64
"
| %s
\n
"
,
strlen
(
serialized
),
serialized
);
fflush
(
NULL
);
taosMemoryFree
(
serialized
);
}
void
logStorePrint2
(
char
*
s
,
SSyncLogStore
*
pLogStore
)
{
char
*
serialized
=
logStore2Str
(
pLogStore
);
printf
(
"logStorePrint2 | len:%
lu
| %s | %s
\n
"
,
strlen
(
serialized
),
s
,
serialized
);
printf
(
"logStorePrint2 | len:%
"
PRIu64
"
| %s | %s
\n
"
,
strlen
(
serialized
),
s
,
serialized
);
fflush
(
NULL
);
taosMemoryFree
(
serialized
);
}
...
...
@@ -690,7 +520,7 @@ void logStorePrint2(char* s, SSyncLogStore* pLogStore) {
void
logStoreLog
(
SSyncLogStore
*
pLogStore
)
{
if
(
gRaftDetailLog
)
{
char
*
serialized
=
logStore2Str
(
pLogStore
);
sTraceLong
(
"logStoreLog | len:%
lu
| %s"
,
strlen
(
serialized
),
serialized
);
sTraceLong
(
"logStoreLog | len:%
"
PRIu64
"
| %s"
,
strlen
(
serialized
),
serialized
);
taosMemoryFree
(
serialized
);
}
}
...
...
@@ -698,7 +528,7 @@ void logStoreLog(SSyncLogStore* pLogStore) {
void
logStoreLog2
(
char
*
s
,
SSyncLogStore
*
pLogStore
)
{
if
(
gRaftDetailLog
)
{
char
*
serialized
=
logStore2Str
(
pLogStore
);
sTraceLong
(
"logStoreLog2 | len:%
lu
| %s | %s"
,
strlen
(
serialized
),
s
,
serialized
);
sTraceLong
(
"logStoreLog2 | len:%
"
PRIu64
"
| %s | %s"
,
strlen
(
serialized
),
s
,
serialized
);
taosMemoryFree
(
serialized
);
}
}
...
...
@@ -706,28 +536,28 @@ void logStoreLog2(char* s, SSyncLogStore* pLogStore) {
// for debug -----------------
void
logStoreSimplePrint
(
SSyncLogStore
*
pLogStore
)
{
char
*
serialized
=
logStoreSimple2Str
(
pLogStore
);
printf
(
"logStoreSimplePrint | len:%
lu
| %s
\n
"
,
strlen
(
serialized
),
serialized
);
printf
(
"logStoreSimplePrint | len:%
"
PRIu64
"
| %s
\n
"
,
strlen
(
serialized
),
serialized
);
fflush
(
NULL
);
taosMemoryFree
(
serialized
);
}
void
logStoreSimplePrint2
(
char
*
s
,
SSyncLogStore
*
pLogStore
)
{
char
*
serialized
=
logStoreSimple2Str
(
pLogStore
);
printf
(
"logStoreSimplePrint2 | len:%
lu
| %s | %s
\n
"
,
strlen
(
serialized
),
s
,
serialized
);
printf
(
"logStoreSimplePrint2 | len:%
"
PRIu64
"
| %s | %s
\n
"
,
strlen
(
serialized
),
s
,
serialized
);
fflush
(
NULL
);
taosMemoryFree
(
serialized
);
}
void
logStoreSimpleLog
(
SSyncLogStore
*
pLogStore
)
{
char
*
serialized
=
logStoreSimple2Str
(
pLogStore
);
sTrace
(
"logStoreSimpleLog | len:%
lu
| %s"
,
strlen
(
serialized
),
serialized
);
sTrace
(
"logStoreSimpleLog | len:%
"
PRIu64
"
| %s"
,
strlen
(
serialized
),
serialized
);
taosMemoryFree
(
serialized
);
}
void
logStoreSimpleLog2
(
char
*
s
,
SSyncLogStore
*
pLogStore
)
{
if
(
gRaftDetailLog
)
{
char
*
serialized
=
logStoreSimple2Str
(
pLogStore
);
sTrace
(
"logStoreSimpleLog2 | len:%
lu
| %s | %s"
,
strlen
(
serialized
),
s
,
serialized
);
sTrace
(
"logStoreSimpleLog2 | len:%
"
PRIu64
"
| %s | %s"
,
strlen
(
serialized
),
s
,
serialized
);
taosMemoryFree
(
serialized
);
}
}
source/libs/sync/src/syncReplication.c
浏览文件 @
5965629b
...
...
@@ -47,138 +47,75 @@
// msource |-> i,
// mdest |-> j])
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
//
int32_t
syncNodeAppendEntriesPeers
(
SSyncNode
*
pSyncNode
)
{
ASSERT
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_LEADER
);
syncIndexMgrLog2
(
"==syncNodeAppendEntriesPeers== pNextIndex"
,
pSyncNode
->
pNextIndex
);
syncIndexMgrLog2
(
"==syncNodeAppendEntriesPeers== pMatchIndex"
,
pSyncNode
->
pMatchIndex
);
logStoreSimpleLog2
(
"==syncNodeAppendEntriesPeers=="
,
pSyncNode
->
pLogStore
);
int32_t
syncNodeReplicateOne
(
SSyncNode
*
pSyncNode
,
SRaftId
*
pDestId
)
{
// next index
SyncIndex
nextIndex
=
syncIndexMgrGetIndex
(
pSyncNode
->
pNextIndex
,
pDestId
);
// maybe start snapshot
SyncIndex
logStartIndex
=
pSyncNode
->
pLogStore
->
syncLogBeginIndex
(
pSyncNode
->
pLogStore
);
SyncIndex
logEndIndex
=
pSyncNode
->
pLogStore
->
syncLogEndIndex
(
pSyncNode
->
pLogStore
);
if
(
nextIndex
<
logStartIndex
||
nextIndex
-
1
>
logEndIndex
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"start snapshot for next-index:%ld, start:%ld, end:%ld"
,
nextIndex
,
logStartIndex
,
logEndIndex
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
int32_t
ret
=
0
;
for
(
int
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SRaftId
*
pDestId
=
&
(
pSyncNode
->
peersId
[
i
]);
// start snapshot
int32_t
code
=
syncNodeStartSnapshot
(
pSyncNode
,
pDestId
);
ASSERT
(
code
==
0
);
return
0
;
}
// set prevLogIndex
SyncIndex
nextIndex
=
syncIndexMgrGetIndex
(
pSyncNode
->
pNextIndex
,
pDestId
);
// pre index, pre term
SyncIndex
preLogIndex
=
syncNodeGetPreIndex
(
pSyncNode
,
nextIndex
);
SyncTerm
preLogTerm
=
syncNodeGetPreTerm
(
pSyncNode
,
nextIndex
);
SyncIndex
preLogIndex
=
nextIndex
-
1
;
// prepare entry
SyncAppendEntries
*
pMsg
=
NULL
;
// set preLogTerm
SyncTerm
preLogTerm
=
0
;
if
(
preLogIndex
>=
SYNC_INDEX_BEGIN
)
{
SSyncRaftEntry
*
pPreEntry
=
pSyncNode
->
pLogStore
->
getEntry
(
pSyncNode
->
pLogStore
,
preLogIndex
);
ASSERT
(
pPreEntry
!=
NULL
);
SSyncRaftEntry
*
pEntry
;
int32_t
code
=
pSyncNode
->
pLogStore
->
syncLogGetEntry
(
pSyncNode
->
pLogStore
,
nextIndex
,
&
pEntry
);
preLogTerm
=
pPreEntry
->
term
;
syncEntryDestory
(
pPreEntry
);
}
if
(
code
==
0
)
{
ASSERT
(
pEntry
!=
NULL
);
// batch optimized
// SyncIndex lastIndex = syncUtilMinIndex(pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore), nextIndex);
SyncAppendEntries
*
pMsg
=
NULL
;
SSyncRaftEntry
*
pEntry
=
pSyncNode
->
pLogStore
->
getEntry
(
pSyncNode
->
pLogStore
,
nextIndex
);
if
(
pEntry
!=
NULL
)
{
pMsg
=
syncAppendEntriesBuild
(
pEntry
->
bytes
,
pSyncNode
->
vgId
);
ASSERT
(
pMsg
!=
NULL
);
pMsg
=
syncAppendEntriesBuild
(
pEntry
->
bytes
,
pSyncNode
->
vgId
);
ASSERT
(
pMsg
!=
NULL
);
// add pEntry into msg
uint32_t
len
;
char
*
serialized
=
syncEntrySerialize
(
pEntry
,
&
len
);
ASSERT
(
len
==
pEntry
->
bytes
);
memcpy
(
pMsg
->
data
,
serialized
,
len
);
// add pEntry into msg
uint32_t
len
;
char
*
serialized
=
syncEntrySerialize
(
pEntry
,
&
len
);
ASSERT
(
len
==
pEntry
->
bytes
);
memcpy
(
pMsg
->
data
,
serialized
,
len
);
taosMemoryFree
(
serialized
);
syncEntryDestory
(
pEntry
);
taosMemoryFree
(
serialized
);
syncEntryDestory
(
pEntry
);
}
else
{
// maybe overflow, send empty record
}
else
{
if
(
terrno
==
TSDB_CODE_WAL_LOG_NOT_EXIST
)
{
// no entry in log
pMsg
=
syncAppendEntriesBuild
(
0
,
pSyncNode
->
vgId
);
ASSERT
(
pMsg
!=
NULL
);
}
ASSERT
(
pMsg
!=
NULL
);
pMsg
->
srcId
=
pSyncNode
->
myRaftId
;
pMsg
->
destId
=
*
pDestId
;
pMsg
->
term
=
pSyncNode
->
pRaftStore
->
currentTerm
;
pMsg
->
prevLogIndex
=
preLogIndex
;
pMsg
->
prevLogTerm
=
preLogTerm
;
pMsg
->
commitIndex
=
pSyncNode
->
commitIndex
;
syncAppendEntriesLog2
(
"==syncNodeAppendEntriesPeers=="
,
pMsg
);
// send AppendEntries
syncNodeAppendEntries
(
pSyncNode
,
pDestId
,
pMsg
);
syncAppendEntriesDestroy
(
pMsg
);
}
return
ret
;
}
int32_t
syncNodeAppendEntriesOnePeer
(
SSyncNode
*
pSyncNode
,
SRaftId
*
pDestId
,
SyncIndex
nextIndex
)
{
int32_t
ret
=
0
;
// pre index, pre term
SyncIndex
preLogIndex
=
syncNodeGetPreIndex
(
pSyncNode
,
nextIndex
);
SyncTerm
preLogTerm
=
syncNodeGetPreTerm
(
pSyncNode
,
nextIndex
);
if
(
preLogTerm
==
SYNC_TERM_INVALID
)
{
SyncIndex
newNextIndex
=
syncNodeGetLastIndex
(
pSyncNode
)
+
1
;
// SyncIndex newNextIndex = nextIndex + 1;
syncIndexMgrSetIndex
(
pSyncNode
->
pNextIndex
,
pDestId
,
newNextIndex
);
syncIndexMgrSetIndex
(
pSyncNode
->
pMatchIndex
,
pDestId
,
SYNC_INDEX_INVALID
);
sError
(
"vgId:%d, sync get pre term error, nextIndex:%"
PRId64
", update next-index:%"
PRId64
", match-index:%d, raftid:%"
PRId64
,
pSyncNode
->
vgId
,
nextIndex
,
newNextIndex
,
SYNC_INDEX_INVALID
,
pDestId
->
addr
);
return
-
1
;
}
// entry pointer array
SSyncRaftEntry
*
entryPArr
[
SYNC_MAX_BATCH_SIZE
];
memset
(
entryPArr
,
0
,
sizeof
(
entryPArr
));
// get entry batch
int32_t
getCount
=
0
;
SyncIndex
getEntryIndex
=
nextIndex
;
for
(
int32_t
i
=
0
;
i
<
pSyncNode
->
pRaftCfg
->
batchSize
;
++
i
)
{
SSyncRaftEntry
*
pEntry
=
NULL
;
int32_t
code
=
pSyncNode
->
pLogStore
->
syncLogGetEntry
(
pSyncNode
->
pLogStore
,
getEntryIndex
,
&
pEntry
);
if
(
code
==
0
)
{
ASSERT
(
pEntry
!=
NULL
);
entryPArr
[
i
]
=
pEntry
;
getCount
++
;
getEntryIndex
++
;
}
else
{
break
;
}
}
// event log
do
{
char
logBuf
[
128
];
char
host
[
64
];
uint16_t
port
;
syncUtilU642Addr
(
pDestId
->
addr
,
host
,
sizeof
(
host
),
&
port
);
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"build batch:%d for %s:%d"
,
getCount
,
host
,
port
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
while
(
0
);
do
{
char
host
[
64
];
uint16_t
port
;
syncUtilU642Addr
(
pDestId
->
addr
,
host
,
sizeof
(
host
),
&
port
);
// build msg
SyncAppendEntriesBatch
*
pMsg
=
syncAppendEntriesBatchBuild
(
entryPArr
,
getCount
,
pSyncNode
->
vgId
);
ASSERT
(
pMsg
!=
NULL
);
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"replicate to %s:%d error, next-index:%ld"
,
host
,
port
,
nextIndex
);
syncNodeErrorLog
(
pSyncNode
,
logBuf
);
}
while
(
0
);
// free entries
for
(
int32_t
i
=
0
;
i
<
pSyncNode
->
pRaftCfg
->
batchSize
;
++
i
)
{
SSyncRaftEntry
*
pEntry
=
entryPArr
[
i
];
if
(
pEntry
!=
NULL
)
{
syncEntryDestory
(
pEntry
);
entryPArr
[
i
]
=
NULL
;
syncAppendEntriesDestroy
(
pMsg
);
return
-
1
;
}
}
// prepare msg
ASSERT
(
pMsg
!=
NULL
);
pMsg
->
srcId
=
pSyncNode
->
myRaftId
;
pMsg
->
destId
=
*
pDestId
;
pMsg
->
term
=
pSyncNode
->
pRaftStore
->
currentTerm
;
...
...
@@ -186,293 +123,67 @@ int32_t syncNodeAppendEntriesOnePeer(SSyncNode* pSyncNode, SRaftId* pDestId, Syn
pMsg
->
prevLogTerm
=
preLogTerm
;
pMsg
->
commitIndex
=
pSyncNode
->
commitIndex
;
pMsg
->
privateTerm
=
0
;
pMsg
->
dataCount
=
getCount
;
// pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId)
;
// send msg
syncNodeAppendEntriesBatch
(
pSyncNode
,
pDestId
,
pMsg
);
// speed up
if
(
pMsg
->
dataCount
>
0
&&
pSyncNode
->
commitIndex
-
pMsg
->
prevLogIndex
>
SYNC_SLOW_DOWN_RANGE
)
{
ret
=
1
;
#if 0
do {
char logBuf[128];
char host[64];
uint16_t port;
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
snprintf(logBuf, sizeof(logBuf), "maybe speed up for %s:%d, pre-index:%ld", host, port, pMsg->prevLogIndex);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
#endif
}
syncAppendEntriesBatchDestroy
(
pMsg
);
syncNodeMaybeSendAppendEntries
(
pSyncNode
,
pDestId
,
pMsg
);
syncAppendEntriesDestroy
(
pMsg
);
return
ret
;
return
0
;
}
int32_t
syncNode
AppendEntriesPeersSnapshot2
(
SSyncNode
*
pSyncNode
)
{
int32_t
syncNode
Replicate
(
SSyncNode
*
pSyncNode
)
{
if
(
pSyncNode
->
state
!=
TAOS_SYNC_STATE_LEADER
)
{
return
-
1
;
}
int32_t
ret
=
0
;
for
(
int
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SRaftId
*
pDestId
=
&
(
pSyncNode
->
peersId
[
i
]);
// next index
SyncIndex
nextIndex
=
syncIndexMgrGetIndex
(
pSyncNode
->
pNextIndex
,
pDestId
);
ret
=
syncNodeAppendEntriesOnePeer
(
pSyncNode
,
pDestId
,
nextIndex
);
}
return
ret
;
}
#if 0
int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode) {
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
return -1;
}
syncNodeEventLog
(
pSyncNode
,
"do replicate"
);
int32_t
ret
=
0
;
for
(
int
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SRaftId
*
pDestId
=
&
(
pSyncNode
->
peersId
[
i
]);
// next index
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
// pre index, pre term
SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
if (preLogTerm == SYNC_TERM_INVALID) {
SyncIndex newNextIndex = syncNodeGetLastIndex(pSyncNode) + 1;
// SyncIndex newNextIndex = nextIndex + 1;
syncIndexMgrSetIndex(pSyncNode->pNextIndex, pDestId, newNextIndex);
syncIndexMgrSetIndex(pSyncNode->pMatchIndex, pDestId, SYNC_INDEX_INVALID);
sError("vgId:%d, sync get pre term error, nextIndex:%" PRId64 ", update next-index:%" PRId64
", match-index:%d, raftid:%" PRId64,
pSyncNode->vgId, nextIndex, newNextIndex, SYNC_INDEX_INVALID, pDestId->addr);
return -1;
}
// entry pointer array
SSyncRaftEntry* entryPArr[SYNC_MAX_BATCH_SIZE];
memset(entryPArr, 0, sizeof(entryPArr));
// get entry batch
int32_t getCount = 0;
SyncIndex getEntryIndex = nextIndex;
for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) {
SSyncRaftEntry* pEntry = NULL;
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, getEntryIndex, &pEntry);
if (code == 0) {
ASSERT(pEntry != NULL);
entryPArr[i] = pEntry;
getCount++;
getEntryIndex++;
} else {
break;
}
}
// event log
do {
char logBuf[128];
char host[64];
uint16_t port;
ret
=
syncNodeReplicateOne
(
pSyncNode
,
pDestId
);
if
(
ret
!=
0
)
{
char
host
[
64
];
int16_t
port
;
syncUtilU642Addr
(
pDestId
->
addr
,
host
,
sizeof
(
host
),
&
port
);
snprintf(logBuf, sizeof(logBuf), "build batch:%d for %s:%d", getCount, host, port);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
// build msg
SyncAppendEntriesBatch* pMsg = syncAppendEntriesBatchBuild(entryPArr, getCount, pSyncNode->vgId);
ASSERT(pMsg != NULL);
// free entries
for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) {
SSyncRaftEntry* pEntry = entryPArr[i];
if (pEntry != NULL) {
syncEntryDestory(pEntry);
entryPArr[i] = NULL;
}
}
// prepare msg
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = *pDestId;
pMsg->term = pSyncNode->pRaftStore->currentTerm;
pMsg->prevLogIndex = preLogIndex;
pMsg->prevLogTerm = preLogTerm;
pMsg->commitIndex = pSyncNode->commitIndex;
pMsg->privateTerm = 0;
pMsg->dataCount = getCount;
// send msg
syncNodeAppendEntriesBatch(pSyncNode, pDestId, pMsg);
// speed up
if (pMsg->dataCount > 0 && pSyncNode->commitIndex - pMsg->prevLogIndex > SYNC_SLOW_DOWN_RANGE) {
ret = 1;
#if 0
do {
char logBuf[128];
char host[64];
uint16_t port;
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
snprintf(logBuf, sizeof(logBuf), "maybe speed up for %s:%d, pre-index:%ld", host, port, pMsg->prevLogIndex);
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
#endif
sError
(
"vgId:%d, do append entries error for %s:%d"
,
pSyncNode
->
vgId
,
host
,
port
);
}
syncAppendEntriesBatchDestroy(pMsg);
}
return
ret
;
return
0
;
}
#endif
int32_t
syncNodeAppendEntriesPeersSnapshot
(
SSyncNode
*
pSyncNode
)
{
ASSERT
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_LEADER
);
syncIndexMgrLog2
(
"begin append entries peers pNextIndex:"
,
pSyncNode
->
pNextIndex
);
syncIndexMgrLog2
(
"begin append entries peers pMatchIndex:"
,
pSyncNode
->
pMatchIndex
);
logStoreSimpleLog2
(
"begin append entries peers LogStore:"
,
pSyncNode
->
pLogStore
);
int32_t
syncNodeSendAppendEntries
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
const
SyncAppendEntries
*
pMsg
)
{
int32_t
ret
=
0
;
for
(
int
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SRaftId
*
pDestId
=
&
(
pSyncNode
->
peersId
[
i
]);
// next index
SyncIndex
nextIndex
=
syncIndexMgrGetIndex
(
pSyncNode
->
pNextIndex
,
pDestId
);
// pre index, pre term
SyncIndex
preLogIndex
=
syncNodeGetPreIndex
(
pSyncNode
,
nextIndex
);
SyncTerm
preLogTerm
=
syncNodeGetPreTerm
(
pSyncNode
,
nextIndex
);
if
(
preLogTerm
==
SYNC_TERM_INVALID
)
{
SyncIndex
newNextIndex
=
syncNodeGetLastIndex
(
pSyncNode
)
+
1
;
// SyncIndex newNextIndex = nextIndex + 1;
syncIndexMgrSetIndex
(
pSyncNode
->
pNextIndex
,
pDestId
,
newNextIndex
);
syncIndexMgrSetIndex
(
pSyncNode
->
pMatchIndex
,
pDestId
,
SYNC_INDEX_INVALID
);
sError
(
"vgId:%d, sync get pre term error, nextIndex:%"
PRId64
", update next-index:%"
PRId64
", match-index:%d, raftid:%"
PRId64
,
pSyncNode
->
vgId
,
nextIndex
,
newNextIndex
,
SYNC_INDEX_INVALID
,
pDestId
->
addr
);
return
-
1
;
}
// prepare entry
SyncAppendEntries
*
pMsg
=
NULL
;
SSyncRaftEntry
*
pEntry
;
int32_t
code
=
pSyncNode
->
pLogStore
->
syncLogGetEntry
(
pSyncNode
->
pLogStore
,
nextIndex
,
&
pEntry
);
if
(
code
==
0
)
{
ASSERT
(
pEntry
!=
NULL
);
pMsg
=
syncAppendEntriesBuild
(
pEntry
->
bytes
,
pSyncNode
->
vgId
);
ASSERT
(
pMsg
!=
NULL
);
// add pEntry into msg
uint32_t
len
;
char
*
serialized
=
syncEntrySerialize
(
pEntry
,
&
len
);
ASSERT
(
len
==
pEntry
->
bytes
);
memcpy
(
pMsg
->
data
,
serialized
,
len
);
syncLogSendAppendEntries
(
pSyncNode
,
pMsg
,
""
);
taosMemoryFree
(
serialized
);
syncEntryDestory
(
pEntry
);
SRpcMsg
rpcMsg
;
syncAppendEntries2RpcMsg
(
pMsg
,
&
rpcMsg
);
syncNodeSendMsgById
(
destRaftId
,
pSyncNode
,
&
rpcMsg
);
}
else
{
if
(
terrno
==
TSDB_CODE_WAL_LOG_NOT_EXIST
)
{
// no entry in log
pMsg
=
syncAppendEntriesBuild
(
0
,
pSyncNode
->
vgId
);
ASSERT
(
pMsg
!=
NULL
);
}
else
{
syncNodeLog3
(
""
,
pSyncNode
);
ASSERT
(
0
);
}
}
SPeerState
*
pState
=
syncNodeGetPeerState
(
pSyncNode
,
destRaftId
);
ASSERT
(
pState
!=
NULL
);
// prepare msg
ASSERT
(
pMsg
!=
NULL
);
pMsg
->
srcId
=
pSyncNode
->
myRaftId
;
pMsg
->
destId
=
*
pDestId
;
pMsg
->
term
=
pSyncNode
->
pRaftStore
->
currentTerm
;
pMsg
->
prevLogIndex
=
preLogIndex
;
pMsg
->
prevLogTerm
=
preLogTerm
;
pMsg
->
commitIndex
=
pSyncNode
->
commitIndex
;
pMsg
->
privateTerm
=
0
;
// pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId);
// send msg
syncNodeAppendEntries
(
pSyncNode
,
pDestId
,
pMsg
);
syncAppendEntriesDestroy
(
pMsg
);
}
pState
->
lastSendIndex
=
pMsg
->
prevLogIndex
+
1
;
pState
->
lastSendTime
=
taosGetTimestampMs
();
return
ret
;
}
int32_t
syncNodeReplicate
(
SSyncNode
*
pSyncNode
,
bool
isTimer
)
{
// start replicate
int32_t
syncNodeMaybeSendAppendEntries
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
const
SyncAppendEntries
*
pMsg
)
{
int32_t
ret
=
0
;
switch
(
pSyncNode
->
pRaftCfg
->
snapshotStrategy
)
{
case
SYNC_STRATEGY_NO_SNAPSHOT
:
ret
=
syncNodeAppendEntriesPeers
(
pSyncNode
);
break
;
case
SYNC_STRATEGY_STANDARD_SNAPSHOT
:
ret
=
syncNodeAppendEntriesPeersSnapshot
(
pSyncNode
);
break
;
case
SYNC_STRATEGY_WAL_FIRST
:
ret
=
syncNodeAppendEntriesPeersSnapshot2
(
pSyncNode
);
break
;
default:
ret
=
syncNodeAppendEntriesPeers
(
pSyncNode
);
break
;
}
// start delay
int64_t
timeNow
=
taosGetTimestampMs
();
int64_t
startDelay
=
timeNow
-
pSyncNode
->
startTime
;
// replicate delay
int64_t
replicateDelay
=
timeNow
-
pSyncNode
->
lastReplicateTime
;
pSyncNode
->
lastReplicateTime
=
timeNow
;
if
(
ret
>
0
&&
isTimer
&&
startDelay
>
SYNC_SPEED_UP_AFTER_MS
)
{
// speed up replicate
int32_t
ms
=
pSyncNode
->
heartbeatTimerMS
<
SYNC_SPEED_UP_HB_TIMER
?
pSyncNode
->
heartbeatTimerMS
:
SYNC_SPEED_UP_HB_TIMER
;
syncNodeRestartNowHeartbeatTimerMS
(
pSyncNode
,
ms
);
#if 0
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "replicate speed up");
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
#endif
if
(
syncNodeNeedSendAppendEntries
(
pSyncNode
,
destRaftId
,
pMsg
))
{
ret
=
syncNodeSendAppendEntries
(
pSyncNode
,
destRaftId
,
pMsg
);
}
else
{
syncNodeRestartHeartbeatTimer
(
pSyncNode
);
#if 0
do {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "replicate slow down");
syncNodeEventLog(pSyncNode, logBuf);
} while (0);
#endif
char
logBuf
[
128
];
char
host
[
64
];
int16_t
port
;
syncUtilU642Addr
(
destRaftId
->
addr
,
host
,
sizeof
(
host
),
&
port
);
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"do not repcate to %s:%d for index:%ld"
,
host
,
port
,
pMsg
->
prevLogIndex
+
1
);
syncNodeEventLog
(
pSyncNode
,
logBuf
);
}
return
ret
;
...
...
@@ -488,12 +199,34 @@ int32_t syncNodeAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, c
return
ret
;
}
int32_t
syncNode
AppendEntriesBatch
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
const
SyncAppendEntriesBatch
*
pMsg
)
{
syncLogSend
AppendEntriesBatch
(
pSyncNode
,
pMsg
,
""
);
int32_t
syncNode
SendHeartbeat
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
const
SyncHeartbeat
*
pMsg
)
{
int32_t
ret
=
0
;
syncLogSend
Heartbeat
(
pSyncNode
,
pMsg
,
""
);
SRpcMsg
rpcMsg
;
syncAppendEntriesBatch2RpcMsg
(
pMsg
,
&
rpcMsg
);
syncNodeSendMsgById
(
destRaftId
,
pSyncNode
,
&
rpcMsg
);
syncHeartbeat2RpcMsg
(
pMsg
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
(
pMsg
->
destId
),
pSyncNode
,
&
rpcMsg
);
return
ret
;
}
int32_t
syncNodeHeartbeatPeers
(
SSyncNode
*
pSyncNode
)
{
for
(
int32_t
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SyncHeartbeat
*
pSyncMsg
=
syncHeartbeatBuild
(
pSyncNode
->
vgId
);
pSyncMsg
->
srcId
=
pSyncNode
->
myRaftId
;
pSyncMsg
->
destId
=
pSyncNode
->
peersId
[
i
];
pSyncMsg
->
term
=
pSyncNode
->
pRaftStore
->
currentTerm
;
pSyncMsg
->
commitIndex
=
pSyncNode
->
commitIndex
;
pSyncMsg
->
minMatchIndex
=
syncMinMatchIndex
(
pSyncNode
);
pSyncMsg
->
privateTerm
=
0
;
SRpcMsg
rpcMsg
;
syncHeartbeat2RpcMsg
(
pSyncMsg
,
&
rpcMsg
);
// send msg
syncNodeSendHeartbeat
(
pSyncNode
,
&
(
pSyncMsg
->
destId
),
pSyncMsg
);
syncHeartbeatDestroy
(
pSyncMsg
);
}
return
0
;
}
\ No newline at end of file
source/libs/sync/src/syncRequestVote.c
浏览文件 @
5965629b
...
...
@@ -42,65 +42,6 @@
// m)
// /\ UNCHANGED <<state, currentTerm, candidateVars, leaderVars, logVars>>
//
int32_t
syncNodeOnRequestVoteCb
(
SSyncNode
*
ths
,
SyncRequestVote
*
pMsg
)
{
int32_t
ret
=
0
;
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
ths
,
&
(
pMsg
->
srcId
))
&&
!
ths
->
pRaftCfg
->
isStandBy
)
{
syncLogRecvRequestVote
(
ths
,
pMsg
,
"maybe replica already dropped"
);
return
-
1
;
}
bool
logOK
=
(
pMsg
->
lastLogTerm
>
ths
->
pLogStore
->
getLastTerm
(
ths
->
pLogStore
))
||
((
pMsg
->
lastLogTerm
==
ths
->
pLogStore
->
getLastTerm
(
ths
->
pLogStore
))
&&
(
pMsg
->
lastLogIndex
>=
ths
->
pLogStore
->
getLastIndex
(
ths
->
pLogStore
)));
// maybe update term
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncNodeUpdateTerm
(
ths
,
pMsg
->
term
);
#if 0
if (logOK) {
syncNodeUpdateTerm(ths, pMsg->term);
} else {
syncNodeUpdateTermWithoutStepDown(ths, pMsg->term);
}
#endif
}
ASSERT
(
pMsg
->
term
<=
ths
->
pRaftStore
->
currentTerm
);
bool
grant
=
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
)
&&
logOK
&&
((
!
raftStoreHasVoted
(
ths
->
pRaftStore
))
||
(
syncUtilSameId
(
&
(
ths
->
pRaftStore
->
voteFor
),
&
(
pMsg
->
srcId
))));
if
(
grant
)
{
// maybe has already voted for pMsg->srcId
// vote again, no harm
raftStoreVote
(
ths
->
pRaftStore
,
&
(
pMsg
->
srcId
));
// forbid elect for this round
syncNodeResetElectTimer
(
ths
);
}
// send msg
SyncRequestVoteReply
*
pReply
=
syncRequestVoteReplyBuild
(
ths
->
vgId
);
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
pRaftStore
->
currentTerm
;
pReply
->
voteGranted
=
grant
;
// trace log
do
{
char
logBuf
[
32
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"grant:%d"
,
pReply
->
voteGranted
);
syncLogRecvRequestVote
(
ths
,
pMsg
,
logBuf
);
syncLogSendRequestVoteReply
(
ths
,
pReply
,
""
);
}
while
(
0
);
SRpcMsg
rpcMsg
;
syncRequestVoteReply2RpcMsg
(
pReply
,
&
rpcMsg
);
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcMsg
);
syncRequestVoteReplyDestroy
(
pReply
);
return
ret
;
}
static
bool
syncNodeOnRequestVoteLogOK
(
SSyncNode
*
pSyncNode
,
SyncRequestVote
*
pMsg
)
{
SyncTerm
myLastTerm
=
syncNodeGetLastTerm
(
pSyncNode
);
...
...
@@ -157,7 +98,7 @@ static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pM
return
false
;
}
int32_t
syncNodeOnRequestVote
SnapshotCb
(
SSyncNode
*
ths
,
SyncRequestVote
*
pMsg
)
{
int32_t
syncNodeOnRequestVote
(
SSyncNode
*
ths
,
SyncRequestVote
*
pMsg
)
{
int32_t
ret
=
0
;
// if already drop replica, do not process
...
...
@@ -170,14 +111,8 @@ int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) {
// maybe update term
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncNodeUpdateTerm
(
ths
,
pMsg
->
term
);
#if 0
if (logOK) {
syncNodeUpdateTerm(ths, pMsg->term);
} else {
syncNodeUpdateTermWithoutStepDown(ths, pMsg->term);
}
#endif
syncNodeStepDown
(
ths
,
pMsg
->
term
);
// syncNodeUpdateTerm(ths, pMsg->term);
}
ASSERT
(
pMsg
->
term
<=
ths
->
pRaftStore
->
currentTerm
);
...
...
@@ -188,6 +123,9 @@ int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) {
// vote again, no harm
raftStoreVote
(
ths
->
pRaftStore
,
&
(
pMsg
->
srcId
));
// candidate ?
syncNodeStepDown
(
ths
,
ths
->
pRaftStore
->
currentTerm
);
// forbid elect for this round
syncNodeResetElectTimer
(
ths
);
}
...
...
source/libs/sync/src/syncRequestVoteReply.c
浏览文件 @
5965629b
...
...
@@ -37,63 +37,7 @@
// /\ Discard(m)
// /\ UNCHANGED <<serverVars, votedFor, leaderVars, logVars>>
//
int32_t
syncNodeOnRequestVoteReplyCb
(
SSyncNode
*
ths
,
SyncRequestVoteReply
*
pMsg
)
{
int32_t
ret
=
0
;
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
ths
,
&
(
pMsg
->
srcId
))
&&
!
ths
->
pRaftCfg
->
isStandBy
)
{
syncLogRecvRequestVoteReply
(
ths
,
pMsg
,
"maybe replica already dropped"
);
return
-
1
;
}
// drop stale response
if
(
pMsg
->
term
<
ths
->
pRaftStore
->
currentTerm
)
{
syncLogRecvRequestVoteReply
(
ths
,
pMsg
,
"drop stale response"
);
return
-
1
;
}
// ASSERT(!(pMsg->term > ths->pRaftStore->currentTerm));
// no need this code, because if I receive reply.term, then I must have sent for that term.
// if (pMsg->term > ths->pRaftStore->currentTerm) {
// syncNodeUpdateTerm(ths, pMsg->term);
// }
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncLogRecvRequestVoteReply
(
ths
,
pMsg
,
"error term"
);
return
-
1
;
}
syncLogRecvRequestVoteReply
(
ths
,
pMsg
,
""
);
ASSERT
(
pMsg
->
term
==
ths
->
pRaftStore
->
currentTerm
);
// This tallies votes even when the current state is not Candidate,
// but they won't be looked at, so it doesn't matter.
if
(
ths
->
state
==
TAOS_SYNC_STATE_CANDIDATE
)
{
votesRespondAdd
(
ths
->
pVotesRespond
,
pMsg
);
if
(
pMsg
->
voteGranted
)
{
// add vote
voteGrantedVote
(
ths
->
pVotesGranted
,
pMsg
);
// maybe to leader
if
(
voteGrantedMajority
(
ths
->
pVotesGranted
))
{
if
(
!
ths
->
pVotesGranted
->
toLeader
)
{
syncNodeCandidate2Leader
(
ths
);
// prevent to leader again!
ths
->
pVotesGranted
->
toLeader
=
true
;
}
}
}
else
{
;
// do nothing
// UNCHANGED <<votesGranted, voterLog>>
}
}
return
0
;
}
int32_t
syncNodeOnRequestVoteReplySnapshotCb
(
SSyncNode
*
ths
,
SyncRequestVoteReply
*
pMsg
)
{
int32_t
syncNodeOnRequestVoteReply
(
SSyncNode
*
ths
,
SyncRequestVoteReply
*
pMsg
)
{
int32_t
ret
=
0
;
// if already drop replica, do not process
...
...
@@ -116,6 +60,7 @@ int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteRepl
if
(
pMsg
->
term
>
ths
->
pRaftStore
->
currentTerm
)
{
syncLogRecvRequestVoteReply
(
ths
,
pMsg
,
"error term"
);
syncNodeStepDown
(
ths
,
pMsg
->
term
);
return
-
1
;
}
...
...
source/libs/sync/src/syncRespMgr.c
浏览文件 @
5965629b
...
...
@@ -136,7 +136,7 @@ void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) {
while
(
pStub
)
{
size_t
len
;
void
*
key
=
taosHashGetKey
(
pStub
,
&
len
);
void
*
key
=
taosHashGetKey
(
pStub
,
&
len
);
uint64_t
*
pSeqNum
=
(
uint64_t
*
)
key
;
sum
++
;
...
...
source/libs/sync/src/syncSnapshot.c
浏览文件 @
5965629b
...
...
@@ -41,6 +41,8 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI
}
memset
(
pSender
,
0
,
sizeof
(
*
pSender
));
int64_t
timeNow
=
taosGetTimestampMs
();
pSender
->
start
=
false
;
pSender
->
seq
=
SYNC_SNAPSHOT_SEQ_INVALID
;
pSender
->
ack
=
SYNC_SNAPSHOT_SEQ_INVALID
;
...
...
@@ -51,7 +53,8 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI
pSender
->
pSyncNode
=
pSyncNode
;
pSender
->
replicaIndex
=
replicaIndex
;
pSender
->
term
=
pSyncNode
->
pRaftStore
->
currentTerm
;
pSender
->
privateTerm
=
taosGetTimestampMs
()
+
100
;
pSender
->
privateTerm
=
timeNow
+
100
;
pSender
->
startTime
=
timeNow
;
pSender
->
pSyncNode
->
pFsm
->
FpGetSnapshotInfo
(
pSender
->
pSyncNode
->
pFsm
,
&
(
pSender
->
snapshot
));
pSender
->
finish
=
false
;
}
else
{
...
...
@@ -402,6 +405,24 @@ char *snapshotSender2SimpleStr(SSyncSnapshotSender *pSender, char *event) {
return
s
;
}
int32_t
syncNodeStartSnapshot
(
SSyncNode
*
pSyncNode
,
SRaftId
*
pDestId
)
{
// calculate <start, end> index
syncNodeEventLog
(
pSyncNode
,
"start snapshot ..."
);
SSyncSnapshotSender
*
pSender
=
syncNodeGetSnapshotSender
(
pSyncNode
,
pDestId
);
if
(
pSender
==
NULL
)
{
// create sender
}
else
{
// if <start, end> is same
// return 0;
}
// send begin msg
return
0
;
}
// -------------------------------------
SSyncSnapshotReceiver
*
snapshotReceiverCreate
(
SSyncNode
*
pSyncNode
,
SRaftId
fromId
)
{
bool
condition
=
(
pSyncNode
->
pFsm
->
FpSnapshotStartWrite
!=
NULL
)
&&
(
pSyncNode
->
pFsm
->
FpSnapshotStopWrite
!=
NULL
)
&&
...
...
@@ -721,7 +742,7 @@ char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event)
// condition 3, recv SYNC_SNAPSHOT_SEQ_FORCE_CLOSE, force close
// condition 4, got data, update ack
//
int32_t
syncNodeOnSnapshot
SendCb
(
SSyncNode
*
pSyncNode
,
SyncSnapshotSend
*
pMsg
)
{
int32_t
syncNodeOnSnapshot
(
SSyncNode
*
pSyncNode
,
SyncSnapshotSend
*
pMsg
)
{
// get receiver
SSyncSnapshotReceiver
*
pReceiver
=
pSyncNode
->
pNewNodeReceiver
;
bool
needRsp
=
false
;
...
...
@@ -834,7 +855,7 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) {
// condition 2 sender receives ack, set seq = ack + 1, send msg from seq
// condition 3 sender receives error msg, just print error log
//
int32_t
syncNodeOnSnapshotR
spCb
(
SSyncNode
*
pSyncNode
,
SyncSnapshotRsp
*
pMsg
)
{
int32_t
syncNodeOnSnapshotR
eply
(
SSyncNode
*
pSyncNode
,
SyncSnapshotRsp
*
pMsg
)
{
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
pSyncNode
,
&
(
pMsg
->
srcId
))
&&
pSyncNode
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
sError
(
"vgId:%d, recv sync-snapshot-rsp, maybe replica already dropped"
,
pSyncNode
->
vgId
);
...
...
source/libs/sync/src/syncTimeout.c
浏览文件 @
5965629b
...
...
@@ -16,6 +16,7 @@
#include "syncTimeout.h"
#include "syncElection.h"
#include "syncRaftCfg.h"
#include "syncRaftLog.h"
#include "syncReplication.h"
#include "syncRespMgr.h"
...
...
@@ -60,12 +61,36 @@ static void syncNodeCleanConfigIndex(SSyncNode* ths) {
int32_t
syncNodeTimerRoutine
(
SSyncNode
*
ths
)
{
syncNodeEventLog
(
ths
,
"timer routines"
);
if
(
ths
->
vgId
==
1
)
{
// timer replicate
syncNodeReplicate
(
ths
);
// clean mnode index
if
(
syncNodeIsMnode
(
ths
))
{
syncNodeCleanConfigIndex
(
ths
);
}
// end timeout wal snapshot
int64_t
timeNow
=
taosGetTimestampMs
();
if
(
timeNow
-
ths
->
snapshottingIndex
>
SYNC_DEL_WAL_MS
&&
atomic_load_64
(
&
ths
->
snapshottingIndex
)
!=
SYNC_INDEX_INVALID
)
{
SSyncLogStoreData
*
pData
=
ths
->
pLogStore
->
data
;
int32_t
code
=
walEndSnapshot
(
pData
->
pWal
);
if
(
code
!=
0
)
{
sError
(
"vgId:%d, wal snapshot end error since:%s"
,
ths
->
vgId
,
terrstr
(
terrno
));
return
-
1
;
}
else
{
do
{
char
logBuf
[
256
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"wal snapshot end, index:%ld"
,
atomic_load_64
(
&
ths
->
snapshottingIndex
));
syncNodeEventLog
(
ths
,
logBuf
);
}
while
(
0
);
atomic_store_64
(
&
ths
->
snapshottingIndex
,
SYNC_INDEX_INVALID
);
}
}
#if 0
if (
ths->vgId != 1
) {
if (
!syncNodeIsMnode(ths)
) {
syncRespClean(ths->pSyncRespMgr);
}
#endif
...
...
@@ -73,9 +98,9 @@ int32_t syncNodeTimerRoutine(SSyncNode* ths) {
return
0
;
}
int32_t
syncNodeOnTime
outCb
(
SSyncNode
*
ths
,
SyncTimeout
*
pMsg
)
{
int32_t
syncNodeOnTime
r
(
SSyncNode
*
ths
,
SyncTimeout
*
pMsg
)
{
int32_t
ret
=
0
;
sync
TimeoutLog2
(
"==syncNodeOnTimeoutCb=="
,
pMsg
);
sync
LogRecvTimer
(
ths
,
pMsg
,
""
);
if
(
pMsg
->
timeoutType
==
SYNC_TIMEOUT_PING
)
{
if
(
atomic_load_64
(
&
ths
->
pingTimerLogicClockUser
)
<=
pMsg
->
logicClock
)
{
...
...
@@ -84,28 +109,30 @@ int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg) {
// syncNodePingAll(ths);
// syncNodePingPeers(ths);
// sTrace("vgId:%d, sync timeout, type:ping count:%d", ths->vgId, ths->pingTimerCounter);
syncNodeTimerRoutine
(
ths
);
}
}
else
if
(
pMsg
->
timeoutType
==
SYNC_TIMEOUT_ELECTION
)
{
if
(
atomic_load_64
(
&
ths
->
electTimerLogicClockUser
)
<=
pMsg
->
logicClock
)
{
++
(
ths
->
electTimerCounter
);
sTrace
(
"vgId:%d, sync timer, type:election count:%"
PRId64
", electTimerLogicClockUser:%"
PRId64
""
,
ths
->
vgId
,
ths
->
electTimerCounter
,
ths
->
electTimerLogicClockUser
);
sTrace
(
"vgId:%d, sync timer, type:election count:%lu, lc-user:%ld"
,
ths
->
vgId
,
ths
->
electTimerCounter
,
ths
->
electTimerLogicClockUser
);
syncNodeElect
(
ths
);
}
}
else
if
(
pMsg
->
timeoutType
==
SYNC_TIMEOUT_HEARTBEAT
)
{
if
(
atomic_load_64
(
&
ths
->
heartbeatTimerLogicClockUser
)
<=
pMsg
->
logicClock
)
{
++
(
ths
->
heartbeatTimerCounter
);
sTrace
(
"vgId:%d, sync timer, type:replicate count:%"
PRId64
", heartbeatTimerLogicClockUser:%"
PRId64
""
,
ths
->
vgId
,
ths
->
heartbeatTimerCounter
,
ths
->
heartbeatTimerLogicClockUser
);
syncNodeReplicate
(
ths
,
true
);
sTrace
(
"vgId:%d, sync timer, type:replicate count:%lu, lc-user:%ld"
,
ths
->
vgId
,
ths
->
heartbeatTimerCounter
,
ths
->
heartbeatTimerLogicClockUser
);
// syncNodeReplicate(ths, true);
}
}
else
{
sError
(
"vgId:%d,
unknown timeout
-type:%d"
,
ths
->
vgId
,
pMsg
->
timeoutType
);
sError
(
"vgId:%d,
recv unknown timer
-type:%d"
,
ths
->
vgId
,
pMsg
->
timeoutType
);
}
return
ret
;
}
}
\ No newline at end of file
source/libs/sync/test/syncConfigChangeSnapshotTest.cpp
浏览文件 @
5965629b
...
...
@@ -237,8 +237,8 @@ int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal*
gSyncIO
->
FpOnSyncAppendEntries
=
pSyncNode
->
FpOnAppendEntries
;
gSyncIO
->
FpOnSyncAppendEntriesReply
=
pSyncNode
->
FpOnAppendEntriesReply
;
gSyncIO
->
FpOnSyncSnapshot
Send
=
pSyncNode
->
FpOnSnapshotSend
;
gSyncIO
->
FpOnSyncSnapshotR
sp
=
pSyncNode
->
FpOnSnapshotRsp
;
gSyncIO
->
FpOnSyncSnapshot
=
pSyncNode
->
FpOnSnapshot
;
gSyncIO
->
FpOnSyncSnapshotR
eply
=
pSyncNode
->
FpOnSnapshotReply
;
gSyncIO
->
pSyncNode
=
pSyncNode
;
syncNodeRelease
(
pSyncNode
);
...
...
source/libs/sync/test/syncEncodeTest.cpp
浏览文件 @
5965629b
...
...
@@ -181,8 +181,11 @@ int main(int argc, char **argv) {
SSyncNode
*
pSyncNode
=
syncNodeInit
();
assert
(
pSyncNode
!=
NULL
);
SSyncRaftEntry
*
pEntry
=
pMsg4
;
pSyncNode
->
pLogStore
->
appendEntry
(
pSyncNode
->
pLogStore
,
pEntry
);
SSyncRaftEntry
*
pEntry2
=
pSyncNode
->
pLogStore
->
getEntry
(
pSyncNode
->
pLogStore
,
pEntry
->
index
);
pSyncNode
->
pLogStore
->
syncLogAppendEntry
(
pSyncNode
->
pLogStore
,
pEntry
);
int32_t
code
=
pSyncNode
->
pLogStore
->
syncLogGetEntry
(
pSyncNode
->
pLogStore
,
pEntry
->
index
,
&
pEntry
);
ASSERT
(
code
==
0
);
syncEntryLog2
((
char
*
)
"==pEntry2=="
,
pEntry2
);
// step5
...
...
source/libs/sync/test/syncHeartbeatReplyTest.cpp
浏览文件 @
5965629b
...
...
@@ -36,7 +36,7 @@ void test1() {
void
test2
()
{
SyncHeartbeatReply
*
pMsg
=
createMsg
();
uint32_t
len
=
pMsg
->
bytes
;
char
*
serialized
=
(
char
*
)
taosMemoryMalloc
(
len
);
char
*
serialized
=
(
char
*
)
taosMemoryMalloc
(
len
);
syncHeartbeatReplySerialize
(
pMsg
,
serialized
,
len
);
SyncHeartbeatReply
*
pMsg2
=
syncHeartbeatReplyBuild
(
1000
);
syncHeartbeatReplyDeserialize
(
serialized
,
len
,
pMsg2
);
...
...
@@ -50,7 +50,7 @@ void test2() {
void
test3
()
{
SyncHeartbeatReply
*
pMsg
=
createMsg
();
uint32_t
len
;
char
*
serialized
=
syncHeartbeatReplySerialize2
(
pMsg
,
&
len
);
char
*
serialized
=
syncHeartbeatReplySerialize2
(
pMsg
,
&
len
);
SyncHeartbeatReply
*
pMsg2
=
syncHeartbeatReplyDeserialize2
(
serialized
,
len
);
syncHeartbeatReplyLog2
((
char
*
)
"test3: syncHeartbeatReplySerialize3 -> syncHeartbeatReplyDeserialize2 "
,
pMsg2
);
...
...
source/libs/sync/test/syncHeartbeatTest.cpp
浏览文件 @
5965629b
...
...
@@ -35,7 +35,7 @@ void test1() {
void
test2
()
{
SyncHeartbeat
*
pMsg
=
createMsg
();
uint32_t
len
=
pMsg
->
bytes
;
char
*
serialized
=
(
char
*
)
taosMemoryMalloc
(
len
);
char
*
serialized
=
(
char
*
)
taosMemoryMalloc
(
len
);
syncHeartbeatSerialize
(
pMsg
,
serialized
,
len
);
SyncHeartbeat
*
pMsg2
=
syncHeartbeatBuild
(
789
);
syncHeartbeatDeserialize
(
serialized
,
len
,
pMsg2
);
...
...
@@ -49,7 +49,7 @@ void test2() {
void
test3
()
{
SyncHeartbeat
*
pMsg
=
createMsg
();
uint32_t
len
;
char
*
serialized
=
syncHeartbeatSerialize2
(
pMsg
,
&
len
);
char
*
serialized
=
syncHeartbeatSerialize2
(
pMsg
,
&
len
);
SyncHeartbeat
*
pMsg2
=
syncHeartbeatDeserialize2
(
serialized
,
len
);
syncHeartbeatLog2
((
char
*
)
"test3: syncHeartbeatSerialize2 -> syncHeartbeatDeserialize2 "
,
pMsg2
);
...
...
source/libs/sync/test/syncLogStoreTest.cpp
浏览文件 @
5965629b
...
...
@@ -52,7 +52,7 @@ void cleanup() {
void
logStoreTest
()
{
pLogStore
=
logStoreCreate
(
pSyncNode
);
assert
(
pLogStore
);
assert
(
pLogStore
->
get
LastIndex
(
pLogStore
)
==
SYNC_INDEX_INVALID
);
assert
(
pLogStore
->
syncLog
LastIndex
(
pLogStore
)
==
SYNC_INDEX_INVALID
);
logStoreLog2
((
char
*
)
"logStoreTest"
,
pLogStore
);
...
...
@@ -65,22 +65,20 @@ void logStoreTest() {
pEntry
->
seqNum
=
3
;
pEntry
->
isWeak
=
true
;
pEntry
->
term
=
100
+
i
;
pEntry
->
index
=
pLogStore
->
get
LastIndex
(
pLogStore
)
+
1
;
pEntry
->
index
=
pLogStore
->
syncLog
LastIndex
(
pLogStore
)
+
1
;
snprintf
(
pEntry
->
data
,
dataLen
,
"value%d"
,
i
);
syncEntryLog2
((
char
*
)
"==write entry== :"
,
pEntry
);
pLogStore
->
a
ppendEntry
(
pLogStore
,
pEntry
);
pLogStore
->
syncLogA
ppendEntry
(
pLogStore
,
pEntry
);
syncEntryDestory
(
pEntry
);
if
(
i
==
0
)
{
assert
(
pLogStore
->
get
LastIndex
(
pLogStore
)
==
SYNC_INDEX_BEGIN
);
assert
(
pLogStore
->
syncLog
LastIndex
(
pLogStore
)
==
SYNC_INDEX_BEGIN
);
}
}
logStoreLog2
((
char
*
)
"after appendEntry"
,
pLogStore
);
pLogStore
->
truncate
(
pLogStore
,
3
);
pLogStore
->
syncLogTruncate
(
pLogStore
,
3
);
logStoreLog2
((
char
*
)
"after truncate 3"
,
pLogStore
);
logStoreDestory
(
pLogStore
);
}
...
...
source/libs/sync/test/syncTestTool.cpp
浏览文件 @
5965629b
...
...
@@ -266,14 +266,12 @@ int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal*
gSyncIO
->
FpOnSyncPingReply
=
pSyncNode
->
FpOnPingReply
;
gSyncIO
->
FpOnSyncTimeout
=
pSyncNode
->
FpOnTimeout
;
gSyncIO
->
FpOnSyncClientRequest
=
pSyncNode
->
FpOnClientRequest
;
gSyncIO
->
FpOnSyncRequestVote
=
pSyncNode
->
FpOnRequestVote
;
gSyncIO
->
FpOnSyncRequestVoteReply
=
pSyncNode
->
FpOnRequestVoteReply
;
gSyncIO
->
FpOnSyncAppendEntries
=
pSyncNode
->
FpOnAppendEntries
;
gSyncIO
->
FpOnSyncAppendEntriesReply
=
pSyncNode
->
FpOnAppendEntriesReply
;
gSyncIO
->
FpOnSyncSnapshotSend
=
pSyncNode
->
FpOnSnapshotSend
;
gSyncIO
->
FpOnSyncSnapshotRsp
=
pSyncNode
->
FpOnSnapshotRsp
;
gSyncIO
->
FpOnSyncSnapshot
=
pSyncNode
->
FpOnSnapshot
;
gSyncIO
->
FpOnSyncSnapshotReply
=
pSyncNode
->
FpOnSnapshotReply
;
gSyncIO
->
pSyncNode
=
pSyncNode
;
syncNodeRelease
(
pSyncNode
);
...
...
tests/script/tsim/sync/sync2-test.sim
0 → 100644
浏览文件 @
5965629b
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/deploy.sh -n dnode2 -i 2
system sh/deploy.sh -n dnode3 -i 3
system sh/deploy.sh -n dnode4 -i 4
system sh/cfg.sh -n dnode1 -c supportVnodes -v 0
system sh/exec.sh -n dnode1 -s start
system sh/exec.sh -n dnode2 -s start
system sh/exec.sh -n dnode3 -s start
system sh/exec.sh -n dnode4 -s start
sql connect
sql create dnode $hostname port 7200
sql create dnode $hostname port 7300
sql create dnode $hostname port 7400
$x = 0
step1:
$x = $x + 1
sleep 1000
if $x == 10 then
print ====> dnode not ready!
return -1
endi
sql select * from information_schema.ins_dnodes
print ===> $data00 $data01 $data02 $data03 $data04 $data05
print ===> $data10 $data11 $data12 $data13 $data14 $data15
print ===> $data20 $data21 $data22 $data23 $data24 $data25
print ===> $data30 $data31 $data32 $data33 $data34 $data35
if $rows != 4 then
return -1
endi
if $data(1)[4] != ready then
goto step1
endi
if $data(2)[4] != ready then
goto step1
endi
if $data(3)[4] != ready then
goto step1
endi
if $data(4)[4] != ready then
goto step1
endi
$replica = 3
$vgroups = 1
print ============= create database
sql create database db replica $replica vgroups $vgroups
$loop_cnt = 0
check_db_ready:
$loop_cnt = $loop_cnt + 1
sleep 200
if $loop_cnt == 100 then
print ====> db not ready!
return -1
endi
sql select * from information_schema.ins_databases
print ===> rows: $rows
print $data[2][0] $data[2][1] $data[2][2] $data[2][3] $data[2][4] $data[2][5] $data[2][6] $data[2][7] $data[2][8] $data[2][9] $data[2][6] $data[2][11] $data[2][12] $data[2][13] $data[2][14] $data[2][15] $data[2][16] $data[2][17] $data[2][18] $data[2][19]
if $rows != 3 then
return -1
endi
if $data[2][15] != ready then
goto check_db_ready
endi
sql use db
$loop_cnt = 0
check_vg_ready:
$loop_cnt = $loop_cnt + 1
sleep 200
if $loop_cnt == 300 then
print ====> vgroups not ready!
return -1
endi
sql show vgroups
print ===> rows: $rows
print $data[0][0] $data[0][1] $data[0][2] $data[0][3] $data[0][4] $data[0][5] $data[0][6] $data[0][7] $data[0][8] $data[0][9] $data[0][10] $data[0][11]
if $rows != $vgroups then
return -1
endi
if $data[0][4] == leader then
if $data[0][6] == follower then
if $data[0][8] == follower then
print ---- vgroup $data[0][0] leader locate on dnode $data[0][3]
endi
endi
elif $data[0][6] == leader then
if $data[0][4] == follower then
if $data[0][8] == follower then
print ---- vgroup $data[0][0] leader locate on dnode $data[0][5]
endi
endi
elif $data[0][8] == leader then
if $data[0][4] == follower then
if $data[0][6] == follower then
print ---- vgroup $data[0][0] leader locate on dnode $data[0][7]
endi
endi
else
goto check_vg_ready
endi
#return 0
vg_ready:
print ====> create stable/child table
sql create table stb (ts timestamp, c1 int, c2 float, c3 double) tags (t1 int)
#return 0
sql show stables
if $rows != 1 then
return -1
endi
sql create table ct1 using stb tags(1000)
print ====> step1 insert 1000 records
$N = 1000
$count = 0
while $count < $N
$ms = 1591200000000 + $count
sql insert into ct1 values( $ms , $count , 2.1, 3.1)
$count = $count + 1
endw
print ====> step2 sleep 20s, checking data
sleep 20000
print ====> step3 sleep 30s, kill leader
sleep 30000
print ====> step4 insert 1000 records
$N = 1000
$count = 0
while $count < $N
$ms = 1591201000000 + $count
sql insert into ct1 values( $ms , $count , 2.1, 3.1)
$count = $count + 1
endw
print ====> step5 sleep 20s, checking data
sleep 20000
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录