Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
48b6bd43
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
48b6bd43
编写于
12月 22, 2022
作者:
S
Shengliang Guan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix: restart snapshot sender on receiver is restart
上级
053f48e3
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
89 addition
and
65 deletion
+89
-65
include/util/taoserror.h
include/util/taoserror.h
+1
-0
source/dnode/vnode/src/vnd/vnodeCommit.c
source/dnode/vnode/src/vnd/vnodeCommit.c
+4
-3
source/dnode/vnode/src/vnd/vnodeSnapshot.c
source/dnode/vnode/src/vnd/vnodeSnapshot.c
+1
-1
source/dnode/vnode/src/vnd/vnodeSync.c
source/dnode/vnode/src/vnd/vnodeSync.c
+2
-2
source/libs/sync/src/syncSnapshot.c
source/libs/sync/src/syncSnapshot.c
+78
-57
source/libs/sync/test/syncRaftLogTest2.cpp
source/libs/sync/test/syncRaftLogTest2.cpp
+1
-1
source/libs/sync/test/syncRaftLogTest3.cpp
source/libs/sync/test/syncRaftLogTest3.cpp
+1
-1
source/util/src/terror.c
source/util/src/terror.c
+1
-0
未找到文件。
include/util/taoserror.h
浏览文件 @
48b6bd43
...
...
@@ -517,6 +517,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_SYN_STANDBY_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x0912)
#define TSDB_CODE_SYN_BATCH_ERROR TAOS_DEF_ERROR_CODE(0, 0x0913)
#define TSDB_CODE_SYN_RESTORING TAOS_DEF_ERROR_CODE(0, 0x0914)
#define TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG TAOS_DEF_ERROR_CODE(0, 0x0915) // internal
#define TSDB_CODE_SYN_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x09FF)
// tq
...
...
source/dnode/vnode/src/vnd/vnodeCommit.c
浏览文件 @
48b6bd43
...
...
@@ -234,10 +234,11 @@ int vnodeAsyncCommit(SVnode *pVnode) {
_exit:
if
(
code
)
{
vError
(
"vgId:%d,
%s failed since %s, commit id:%"
PRId64
,
TD_VID
(
pVnode
),
__func__
,
tstrerror
(
code
),
vError
(
"vgId:%d,
vnode async commit failed since %s, commitId:%"
PRId64
,
TD_VID
(
pVnode
)
,
tstrerror
(
code
),
pVnode
->
state
.
commitID
);
}
else
{
vDebug
(
"vgId:%d, %s done"
,
TD_VID
(
pVnode
),
__func__
);
vInfo
(
"vgId:%d, vnode async commit done, commitId:%"
PRId64
" term:%"
PRId64
" applied:%"
PRId64
,
TD_VID
(
pVnode
),
pVnode
->
state
.
commitID
,
pVnode
->
state
.
applyTerm
,
pVnode
->
state
.
applied
);
}
return
code
;
}
...
...
@@ -256,7 +257,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) {
char
dir
[
TSDB_FILENAME_LEN
]
=
{
0
};
SVnode
*
pVnode
=
pInfo
->
pVnode
;
vInfo
(
"vgId:%d, start to commit, commit
ID
:%"
PRId64
" version:%"
PRId64
" term: %"
PRId64
,
TD_VID
(
pVnode
),
vInfo
(
"vgId:%d, start to commit, commit
Id
:%"
PRId64
" version:%"
PRId64
" term: %"
PRId64
,
TD_VID
(
pVnode
),
pVnode
->
state
.
commitID
,
pVnode
->
state
.
applied
,
pVnode
->
state
.
applyTerm
);
// persist wal before starting
...
...
source/dnode/vnode/src/vnd/vnodeSnapshot.c
浏览文件 @
48b6bd43
...
...
@@ -423,7 +423,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) {
ASSERT
(
pHdr
->
index
==
pWriter
->
index
+
1
);
pWriter
->
index
=
pHdr
->
index
;
vInfo
(
"vgId:%d, vnode snapshot write data, index:%"
PRId64
" type:%d
nData
:%d"
,
TD_VID
(
pVnode
),
pHdr
->
index
,
vInfo
(
"vgId:%d, vnode snapshot write data, index:%"
PRId64
" type:%d
blockLen
:%d"
,
TD_VID
(
pVnode
),
pHdr
->
index
,
pHdr
->
type
,
nData
);
switch
(
pHdr
->
type
)
{
...
...
source/dnode/vnode/src/vnd/vnodeSync.c
浏览文件 @
48b6bd43
...
...
@@ -465,9 +465,9 @@ static int32_t vnodeSnapshotStopWrite(const SSyncFSM *pFsm, void *pWriter, bool
static
int32_t
vnodeSnapshotDoWrite
(
const
SSyncFSM
*
pFsm
,
void
*
pWriter
,
void
*
pBuf
,
int32_t
len
)
{
SVnode
*
pVnode
=
pFsm
->
data
;
vDebug
(
"vgId:%d, continue write vnode snapshot,
l
en:%d"
,
pVnode
->
config
.
vgId
,
len
);
vDebug
(
"vgId:%d, continue write vnode snapshot,
blockL
en:%d"
,
pVnode
->
config
.
vgId
,
len
);
int32_t
code
=
vnodeSnapWrite
(
pWriter
,
pBuf
,
len
);
vDebug
(
"vgId:%d, continue write vnode snapshot finished,
l
en:%d"
,
pVnode
->
config
.
vgId
,
len
);
vDebug
(
"vgId:%d, continue write vnode snapshot finished,
blockL
en:%d"
,
pVnode
->
config
.
vgId
,
len
);
return
code
;
}
...
...
source/libs/sync/src/syncSnapshot.c
浏览文件 @
48b6bd43
...
...
@@ -294,7 +294,7 @@ int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) {
}
if
(
snapshotSenderIsStart
(
pSender
))
{
sS
Error
(
pSender
,
"snapshot sender already start, ignore"
);
sS
Info
(
pSender
,
"snapshot sender already start, ignore"
);
return
0
;
}
...
...
@@ -523,7 +523,7 @@ static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnap
static
int32_t
snapshotReceiverGotData
(
SSyncSnapshotReceiver
*
pReceiver
,
SyncSnapshotSend
*
pMsg
)
{
if
(
pMsg
->
seq
!=
pReceiver
->
ack
+
1
)
{
sRError
(
pReceiver
,
"snapshot receiver invalid seq, ack:%d seq:%d"
,
pReceiver
->
ack
,
pMsg
->
seq
);
terrno
=
TSDB_CODE_SYN_IN
TERNAL_ERROR
;
terrno
=
TSDB_CODE_SYN_IN
VALID_SNAPSHOT_MSG
;
return
-
1
;
}
...
...
@@ -721,8 +721,12 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS
timeNow
=
taosGetTimestampMs
();
}
int32_t
code
=
0
;
if
(
snapshotReceiverGotData
(
pReceiver
,
pMsg
)
!=
0
)
{
return
-
1
;
code
=
terrno
;
if
(
code
>=
SYNC_SNAPSHOT_SEQ_INVALID
)
{
code
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
}
}
// build msg
...
...
@@ -740,7 +744,7 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS
pRspMsg
->
lastTerm
=
pMsg
->
lastTerm
;
pRspMsg
->
startTime
=
pReceiver
->
startTime
;
pRspMsg
->
ack
=
pReceiver
->
ack
;
// receiver maybe already closed
pRspMsg
->
code
=
0
;
pRspMsg
->
code
=
code
;
pRspMsg
->
snapBeginIndex
=
pReceiver
->
snapshotParam
.
start
;
// send msg
...
...
@@ -861,7 +865,7 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
syncLogRecvSyncSnapshotSend
(
pSyncNode
,
pMsg
,
"process force stop"
);
snapshotReceiverForceStop
(
pReceiver
);
}
else
if
(
pMsg
->
seq
>
SYNC_SNAPSHOT_SEQ_BEGIN
&&
pMsg
->
seq
<
SYNC_SNAPSHOT_SEQ_END
)
{
syncLogRecvSyncSnapshotSend
(
pSyncNode
,
pMsg
,
"process seq"
);
syncLogRecvSyncSnapshotSend
(
pSyncNode
,
pMsg
,
"process seq
data
"
);
syncNodeOnSnapshotTransfering
(
pSyncNode
,
pMsg
);
}
else
{
// error log
...
...
@@ -982,68 +986,85 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
}
// state, term, seq/ack
if
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
if
(
pMsg
->
term
==
pSyncNode
->
pRaftStore
->
currentTerm
)
{
// prepare <begin, end>, send begin msg
if
(
pMsg
->
ack
==
SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"process seq pre-snapshot"
);
syncNodeOnSnapshotReplyPre
(
pSyncNode
,
pMsg
);
return
0
;
}
if
(
pSyncNode
->
state
!=
TAOS_SYNC_STATE_LEADER
)
{
sSError
(
pSender
,
"snapshot sender not leader"
);
return
-
1
;
}
if
(
pMsg
->
ack
==
SYNC_SNAPSHOT_SEQ_BEGIN
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"process seq begin"
);
if
(
snapshotSenderUpdateProgress
(
pSender
,
pMsg
)
!=
0
)
{
return
-
1
;
}
if
(
pMsg
->
term
!=
pSyncNode
->
pRaftStore
->
currentTerm
)
{
sSError
(
pSender
,
"snapshot sender term not equal"
);
return
-
1
;
}
if
(
snapshotSend
(
pSender
)
!=
0
)
{
return
-
1
;
}
return
0
;
}
if
(
pMsg
->
code
!=
0
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"receive error code"
);
sSError
(
pSender
,
"snapshot sender receive error code:0x%x and stop sender"
,
pMsg
->
code
);
snapshotSenderStop
(
pSender
,
true
);
SSyncLogReplMgr
*
pMgr
=
syncNodeGetLogReplMgr
(
pSyncNode
,
&
pMsg
->
srcId
);
if
(
pMgr
)
{
syncLogReplMgrReset
(
pMgr
);
}
// receive ack is finish, close sender
if
(
pMsg
->
ack
==
SYNC_SNAPSHOT_SEQ_END
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"process seq end"
);
snapshotSenderStop
(
pSender
,
true
);
SSyncLogReplMgr
*
pMgr
=
syncNodeGetLogReplMgr
(
pSyncNode
,
&
pMsg
->
srcId
);
if
(
pMgr
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"reset repl mgr"
);
syncLogReplMgrReset
(
pMgr
);
}
return
0
;
}
return
-
1
;
}
// send next msg
if
(
pMsg
->
ack
==
pSender
->
seq
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"process seq"
);
// update sender ack
if
(
snapshotSenderUpdateProgress
(
pSender
,
pMsg
)
!=
0
)
{
return
-
1
;
}
if
(
snapshotSend
(
pSender
)
!=
0
)
{
return
-
1
;
}
// prepare <begin, end>, send begin msg
if
(
pMsg
->
ack
==
SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"process seq pre-snapshot"
);
syncNodeOnSnapshotReplyPre
(
pSyncNode
,
pMsg
);
return
0
;
}
}
else
if
(
pMsg
->
ack
==
pSender
->
seq
-
1
)
{
// maybe resend
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"process seq and resend"
);
snapshotReSend
(
pSender
);
if
(
pMsg
->
ack
==
SYNC_SNAPSHOT_SEQ_BEGIN
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"process seq begin"
);
if
(
snapshotSenderUpdateProgress
(
pSender
,
pMsg
)
!=
0
)
{
return
-
1
;
}
}
else
{
// error log
sSError
(
pSender
,
"snapshot sender recv error ack:%d, my seq:%d"
,
pMsg
->
ack
,
pSender
->
seq
);
return
-
1
;
}
}
else
{
// error log
sSError
(
pSender
,
"snapshot sender term not equal"
);
if
(
snapshotSend
(
pSender
)
!=
0
)
{
return
-
1
;
}
return
0
;
}
// receive ack is finish, close sender
if
(
pMsg
->
ack
==
SYNC_SNAPSHOT_SEQ_END
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"process seq end"
);
snapshotSenderStop
(
pSender
,
true
);
SSyncLogReplMgr
*
pMgr
=
syncNodeGetLogReplMgr
(
pSyncNode
,
&
pMsg
->
srcId
);
if
(
pMgr
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"reset repl mgr"
);
syncLogReplMgrReset
(
pMgr
);
}
return
0
;
}
// send next msg
if
(
pMsg
->
ack
==
pSender
->
seq
)
{
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"process seq data"
);
// update sender ack
if
(
snapshotSenderUpdateProgress
(
pSender
,
pMsg
)
!=
0
)
{
return
-
1
;
}
if
(
snapshotSend
(
pSender
)
!=
0
)
{
return
-
1
;
}
}
else
if
(
pMsg
->
ack
==
pSender
->
seq
-
1
)
{
// maybe resend
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"process seq and resend"
);
snapshotReSend
(
pSender
);
}
else
{
// error log
sSError
(
pSender
,
"snapshot sender not leader"
);
syncLogRecvSyncSnapshotRsp
(
pSyncNode
,
pMsg
,
"receive error ack"
);
sSError
(
pSender
,
"snapshot sender receive error ack:%d, my seq:%d"
,
pMsg
->
ack
,
pSender
->
seq
);
snapshotSenderStop
(
pSender
,
true
);
SSyncLogReplMgr
*
pMgr
=
syncNodeGetLogReplMgr
(
pSyncNode
,
&
pMsg
->
srcId
);
if
(
pMgr
)
{
syncLogReplMgrReset
(
pMgr
);
}
return
-
1
;
}
...
...
source/libs/sync/test/syncRaftLogTest2.cpp
浏览文件 @
48b6bd43
...
...
@@ -47,7 +47,7 @@ void init() {
pSyncNode
->
pWal
=
pWal
;
pSyncNode
->
pFsm
=
(
SSyncFSM
*
)
taosMemoryMalloc
(
sizeof
(
SSyncFSM
));
pSyncNode
->
pFsm
->
FpGetSnapshotInfo
=
GetSnapshotCb
;
//
pSyncNode->pFsm->FpGetSnapshotInfo = GetSnapshotCb;
}
void
cleanup
()
{
...
...
source/libs/sync/test/syncRaftLogTest3.cpp
浏览文件 @
48b6bd43
...
...
@@ -47,7 +47,7 @@ void init() {
pSyncNode
->
pWal
=
pWal
;
pSyncNode
->
pFsm
=
(
SSyncFSM
*
)
taosMemoryMalloc
(
sizeof
(
SSyncFSM
));
pSyncNode
->
pFsm
->
FpGetSnapshotInfo
=
GetSnapshotCb
;
//
pSyncNode->pFsm->FpGetSnapshotInfo = GetSnapshotCb;
}
void
cleanup
()
{
...
...
source/util/src/terror.c
浏览文件 @
48b6bd43
...
...
@@ -405,6 +405,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_PROPOSE_NOT_READY, "Sync not ready for pr
TAOS_DEFINE_ERROR
(
TSDB_CODE_SYN_STANDBY_NOT_READY
,
"Sync not ready for standby"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_SYN_BATCH_ERROR
,
"Sync batch error"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_SYN_RESTORING
,
"Sync is restoring"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG
,
"Sync invalid snapshot msg"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_SYN_INTERNAL_ERROR
,
"Sync internal error"
)
//tq
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录