Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
83112644
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
83112644
编写于
12月 26, 2022
作者:
S
Shengliang Guan
提交者:
GitHub
12月 26, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #19149 from taosdata/enh/TD-21492
enh: handle error while transfer snapshot
上级
233322db
b381c42c
变更
8
展开全部
隐藏空白更改
内联
并排
Showing
8 changed file
with
133 addition
and
157 deletion
+133
-157
source/dnode/vnode/src/meta/metaSnapshot.c
source/dnode/vnode/src/meta/metaSnapshot.c
+2
-2
source/dnode/vnode/src/vnd/vnodeSnapshot.c
source/dnode/vnode/src/vnd/vnodeSnapshot.c
+2
-2
source/libs/sync/inc/syncInt.h
source/libs/sync/inc/syncInt.h
+1
-1
source/libs/sync/inc/syncPipeline.h
source/libs/sync/inc/syncPipeline.h
+1
-1
source/libs/sync/inc/syncSnapshot.h
source/libs/sync/inc/syncSnapshot.h
+3
-3
source/libs/sync/src/syncMain.c
source/libs/sync/src/syncMain.c
+48
-40
source/libs/sync/src/syncPipeline.c
source/libs/sync/src/syncPipeline.c
+3
-2
source/libs/sync/src/syncSnapshot.c
source/libs/sync/src/syncSnapshot.c
+73
-106
未找到文件。
source/dnode/vnode/src/meta/metaSnapshot.c
浏览文件 @
83112644
...
...
@@ -113,8 +113,8 @@ int32_t metaSnapRead(SMetaSnapReader* pReader, uint8_t** ppData) {
pHdr
->
size
=
nData
;
memcpy
(
pHdr
->
data
,
pData
,
nData
);
meta
Info
(
"vgId:%d, vnode snapshot meta read data, version:%"
PRId64
" uid:%"
PRId64
" nData
:%d"
,
TD_VID
(
pReader
->
pMeta
->
pVnode
),
key
.
version
,
key
.
uid
,
nData
);
meta
Debug
(
"vgId:%d, vnode snapshot meta read data, version:%"
PRId64
" uid:%"
PRId64
" blockLen
:%d"
,
TD_VID
(
pReader
->
pMeta
->
pVnode
),
key
.
version
,
key
.
uid
,
nData
);
_exit:
return
code
;
...
...
source/dnode/vnode/src/vnd/vnodeSnapshot.c
浏览文件 @
83112644
...
...
@@ -257,8 +257,8 @@ _exit:
pReader
->
index
++
;
*
nData
=
sizeof
(
SSnapDataHdr
)
+
pHdr
->
size
;
pHdr
->
index
=
pReader
->
index
;
v
Info
(
"vgId:%d, vnode snapshot read data,index:%"
PRId64
" type:%d nData
:%d "
,
TD_VID
(
pReader
->
pVnode
),
pReader
->
index
,
pHdr
->
type
,
*
nData
);
v
Debug
(
"vgId:%d, vnode snapshot read data, index:%"
PRId64
" type:%d blockLen
:%d "
,
TD_VID
(
pReader
->
pVnode
),
pReader
->
index
,
pHdr
->
type
,
*
nData
);
}
else
{
vInfo
(
"vgId:%d, vnode snapshot read data end, index:%"
PRId64
,
TD_VID
(
pReader
->
pVnode
),
pReader
->
index
);
}
...
...
source/libs/sync/inc/syncInt.h
浏览文件 @
83112644
...
...
@@ -238,7 +238,7 @@ int32_t syncNodeStopPingTimer(SSyncNode* pSyncNode);
int32_t
syncNodeStartElectTimer
(
SSyncNode
*
pSyncNode
,
int32_t
ms
);
int32_t
syncNodeStopElectTimer
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeRestartElectTimer
(
SSyncNode
*
pSyncNode
,
int32_t
ms
);
int32_t
syncNodeResetElectTimer
(
SSyncNode
*
pSyncNode
);
void
syncNodeResetElectTimer
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeStartHeartbeatTimer
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeStopHeartbeatTimer
(
SSyncNode
*
pSyncNode
);
int32_t
syncNodeRestartHeartbeatTimer
(
SSyncNode
*
pSyncNode
);
...
...
source/libs/sync/inc/syncPipeline.h
浏览文件 @
83112644
...
...
@@ -61,7 +61,7 @@ typedef struct SSyncLogBuffer {
// SSyncLogRepMgr
SSyncLogReplMgr
*
syncLogReplMgrCreate
();
void
syncLogReplMgrDestroy
(
SSyncLogReplMgr
*
pMgr
);
int32_t
syncLogReplMgrReset
(
SSyncLogReplMgr
*
pMgr
);
void
syncLogReplMgrReset
(
SSyncLogReplMgr
*
pMgr
);
int32_t
syncNodeLogReplMgrInit
(
SSyncNode
*
pNode
);
void
syncNodeLogReplMgrDestroy
(
SSyncNode
*
pNode
);
...
...
source/libs/sync/inc/syncSnapshot.h
浏览文件 @
83112644
...
...
@@ -56,7 +56,7 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI
void
snapshotSenderDestroy
(
SSyncSnapshotSender
*
pSender
);
bool
snapshotSenderIsStart
(
SSyncSnapshotSender
*
pSender
);
int32_t
snapshotSenderStart
(
SSyncSnapshotSender
*
pSender
);
int32_t
snapshotSenderStop
(
SSyncSnapshotSender
*
pSender
,
bool
finish
);
void
snapshotSenderStop
(
SSyncSnapshotSender
*
pSender
,
bool
finish
);
int32_t
snapshotSend
(
SSyncSnapshotSender
*
pSender
);
int32_t
snapshotReSend
(
SSyncSnapshotSender
*
pSender
);
...
...
@@ -79,8 +79,8 @@ typedef struct SSyncSnapshotReceiver {
SSyncSnapshotReceiver
*
snapshotReceiverCreate
(
SSyncNode
*
pSyncNode
,
SRaftId
fromId
);
void
snapshotReceiverDestroy
(
SSyncSnapshotReceiver
*
pReceiver
);
int32_t
snapshotReceiverStart
(
SSyncSnapshotReceiver
*
pReceiver
,
SyncSnapshotSend
*
pBeginMsg
);
int32_t
snapshotReceiverStop
(
SSyncSnapshotReceiver
*
pReceiver
);
void
snapshotReceiverStart
(
SSyncSnapshotReceiver
*
pReceiver
,
SyncSnapshotSend
*
pBeginMsg
);
void
snapshotReceiverStop
(
SSyncSnapshotReceiver
*
pReceiver
);
bool
snapshotReceiverIsStart
(
SSyncSnapshotReceiver
*
pReceiver
);
void
snapshotReceiverForceStop
(
SSyncSnapshotReceiver
*
pReceiver
);
...
...
source/libs/sync/src/syncMain.c
浏览文件 @
83112644
...
...
@@ -200,12 +200,15 @@ int32_t syncProcessMsg(int64_t rid, SRpcMsg* pMsg) {
code
=
syncNodeOnLocalCmd
(
pSyncNode
,
pMsg
);
break
;
default:
sError
(
"vgId:%d, failed to process msg:%p since invalid type:%s"
,
pSyncNode
->
vgId
,
pMsg
,
TMSG_INFO
(
pMsg
->
msgType
));
terrno
=
TSDB_CODE_MSG_NOT_PROCESSED
;
code
=
-
1
;
}
syncNodeRelease
(
pSyncNode
);
if
(
code
!=
0
)
{
sDebug
(
"vgId:%d, failed to process sync msg:%p type:%s since 0x%x"
,
pSyncNode
->
vgId
,
pMsg
,
TMSG_INFO
(
pMsg
->
msgType
),
terrno
);
}
return
code
;
}
...
...
@@ -228,8 +231,7 @@ int32_t syncSendTimeoutRsp(int64_t rid, int64_t seq) {
syncNodeRelease
(
pNode
);
if
(
ret
==
1
)
{
sInfo
(
"send timeout response, seq:%"
PRId64
" handle:%p ahandle:%p"
,
seq
,
rpcMsg
.
info
.
handle
,
rpcMsg
.
info
.
ahandle
);
sInfo
(
"send timeout response, seq:%"
PRId64
" handle:%p ahandle:%p"
,
seq
,
rpcMsg
.
info
.
handle
,
rpcMsg
.
info
.
ahandle
);
rpcSendResponse
(
&
rpcMsg
);
return
0
;
}
else
{
...
...
@@ -1084,13 +1086,17 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
// snapshot senders
for
(
int32_t
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
SSyncSnapshotSender
*
pSender
=
snapshotSenderCreate
(
pSyncNode
,
i
);
// ASSERT(pSender != NULL);
(
pSyncNode
->
senders
)[
i
]
=
pSender
;
sSDebug
(
pSender
,
"snapshot sender create new while open, data:%p"
,
pSender
);
if
(
pSender
==
NULL
)
return
NULL
;
pSyncNode
->
senders
[
i
]
=
pSender
;
sSDebug
(
pSender
,
"snapshot sender create while open sync node, data:%p"
,
pSender
);
}
// snapshot receivers
pSyncNode
->
pNewNodeReceiver
=
snapshotReceiverCreate
(
pSyncNode
,
EMPTY_RAFT_ID
);
if
(
pSyncNode
->
pNewNodeReceiver
==
NULL
)
return
NULL
;
sRDebug
(
pSyncNode
->
pNewNodeReceiver
,
"snapshot receiver create while open sync node, data:%p"
,
pSyncNode
->
pNewNodeReceiver
);
// is config changing
pSyncNode
->
changing
=
false
;
...
...
@@ -1131,10 +1137,8 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode
->
hbrSlowNum
=
0
;
pSyncNode
->
tmrRoutineNum
=
0
;
sNInfo
(
pSyncNode
,
"sync open, node:%p"
,
pSyncNode
);
sTrace
(
"vgId:%d, tsElectInterval:%d, tsHeartbeatInterval:%d, tsHeartbeatTimeout:%d"
,
pSyncNode
->
vgId
,
tsElectInterval
,
tsHeartbeatInterval
,
tsHeartbeatTimeout
);
sNInfo
(
pSyncNode
,
"sync open, node:%p electInterval:%d heartbeatInterval:%d heartbeatTimeout:%d"
,
pSyncNode
,
tsElectInterval
,
tsHeartbeatInterval
,
tsHeartbeatTimeout
);
return
pSyncNode
;
_error:
...
...
@@ -1251,6 +1255,8 @@ void syncNodePreClose(SSyncNode* pSyncNode) {
snapshotReceiverForceStop
(
pSyncNode
->
pNewNodeReceiver
);
}
sDebug
(
"vgId:%d, snapshot receiver destroy while preclose sync node, data:%p"
,
pSyncNode
->
vgId
,
pSyncNode
->
pNewNodeReceiver
);
snapshotReceiverDestroy
(
pSyncNode
->
pNewNodeReceiver
);
pSyncNode
->
pNewNodeReceiver
=
NULL
;
}
...
...
@@ -1295,15 +1301,15 @@ void syncNodeClose(SSyncNode* pSyncNode) {
syncNodeStopHeartbeatTimer
(
pSyncNode
);
for
(
int32_t
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
if
(
(
pSyncNode
->
senders
)
[
i
]
!=
NULL
)
{
s
STrace
((
pSyncNode
->
senders
)[
i
],
"snapshot sender destroy while close, data:%p"
,
(
pSyncNode
->
senders
)
[
i
]);
if
(
pSyncNode
->
senders
[
i
]
!=
NULL
)
{
s
Debug
(
"vgId:%d, snapshot sender destroy while close, data:%p"
,
pSyncNode
->
vgId
,
pSyncNode
->
senders
[
i
]);
if
(
snapshotSenderIsStart
(
(
pSyncNode
->
senders
)
[
i
]))
{
snapshotSenderStop
(
(
pSyncNode
->
senders
)
[
i
],
false
);
if
(
snapshotSenderIsStart
(
pSyncNode
->
senders
[
i
]))
{
snapshotSenderStop
(
pSyncNode
->
senders
[
i
],
false
);
}
snapshotSenderDestroy
(
(
pSyncNode
->
senders
)
[
i
]);
(
pSyncNode
->
senders
)
[
i
]
=
NULL
;
snapshotSenderDestroy
(
pSyncNode
->
senders
[
i
]);
pSyncNode
->
senders
[
i
]
=
NULL
;
}
}
...
...
@@ -1312,6 +1318,7 @@ void syncNodeClose(SSyncNode* pSyncNode) {
snapshotReceiverForceStop
(
pSyncNode
->
pNewNodeReceiver
);
}
sDebug
(
"vgId:%d, snapshot receiver destroy while close, data:%p"
,
pSyncNode
->
vgId
,
pSyncNode
->
pNewNodeReceiver
);
snapshotReceiverDestroy
(
pSyncNode
->
pNewNodeReceiver
);
pSyncNode
->
pNewNodeReceiver
=
NULL
;
}
...
...
@@ -1382,8 +1389,7 @@ int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms) {
return
ret
;
}
int32_t
syncNodeResetElectTimer
(
SSyncNode
*
pSyncNode
)
{
int32_t
ret
=
0
;
void
syncNodeResetElectTimer
(
SSyncNode
*
pSyncNode
)
{
int32_t
electMS
;
if
(
pSyncNode
->
pRaftCfg
->
isStandBy
)
{
...
...
@@ -1391,11 +1397,11 @@ int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode) {
}
else
{
electMS
=
syncUtilElectRandomMS
(
pSyncNode
->
electBaseLine
,
2
*
pSyncNode
->
electBaseLine
);
}
ret
=
syncNodeRestartElectTimer
(
pSyncNode
,
electMS
);
(
void
)
syncNodeRestartElectTimer
(
pSyncNode
,
electMS
);
sNTrace
(
pSyncNode
,
"reset elect timer, min:%d, max:%d, ms:%d"
,
pSyncNode
->
electBaseLine
,
2
*
pSyncNode
->
electBaseLine
,
electMS
);
return
ret
;
}
static
int32_t
syncNodeDoStartHeartbeatTimer
(
SSyncNode
*
pSyncNode
)
{
...
...
@@ -1455,23 +1461,20 @@ int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode) {
return
0
;
}
// utils --------------
int32_t
syncNodeSendMsgById
(
const
SRaftId
*
destRaftId
,
SSyncNode
*
pSyncNode
,
SRpcMsg
*
pMsg
)
{
SEpSet
epSet
;
syncUtilRaftId2EpSet
(
destRaftId
,
&
epSet
);
if
(
pSyncNode
->
syncSendMSg
!=
NULL
)
{
// htonl
syncUtilMsgHtoN
(
pMsg
->
pCont
);
pMsg
->
info
.
noResp
=
1
;
pSyncNode
->
syncSendMSg
(
&
epSet
,
pMsg
);
return
pSyncNode
->
syncSendMSg
(
&
epSet
,
pMsg
);
}
else
{
sError
(
"vgId:%d, sync send msg by id error, fp-send-msg is null"
,
pSyncNode
->
vgId
);
rpcFreeCont
(
pMsg
->
pCont
);
terrno
=
TSDB_CODE_SYN_INTERNAL_ERROR
;
return
-
1
;
}
return
0
;
}
int32_t
syncNodeSendMsgByInfo
(
const
SNodeInfo
*
nodeInfo
,
SSyncNode
*
pSyncNode
,
SRpcMsg
*
pMsg
)
{
...
...
@@ -1586,7 +1589,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
memcpy
(
oldReplicasId
,
pSyncNode
->
replicasId
,
sizeof
(
oldReplicasId
));
SSyncSnapshotSender
*
oldSenders
[
TSDB_MAX_REPLICA
];
for
(
int32_t
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
oldSenders
[
i
]
=
(
pSyncNode
->
senders
)
[
i
];
oldSenders
[
i
]
=
pSyncNode
->
senders
[
i
];
sSTrace
(
oldSenders
[
i
],
"snapshot sender save old"
);
}
...
...
@@ -1625,7 +1628,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
// clear new
for
(
int32_t
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
(
pSyncNode
->
senders
)
[
i
]
=
NULL
;
pSyncNode
->
senders
[
i
]
=
NULL
;
}
// reset new
...
...
@@ -1640,16 +1643,16 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
sNTrace
(
pSyncNode
,
"snapshot sender reset for: %"
PRId64
", newIndex:%d, %s:%d, %p"
,
(
pSyncNode
->
replicasId
)[
i
].
addr
,
i
,
host
,
port
,
oldSenders
[
j
]);
(
pSyncNode
->
senders
)
[
i
]
=
oldSenders
[
j
];
pSyncNode
->
senders
[
i
]
=
oldSenders
[
j
];
oldSenders
[
j
]
=
NULL
;
reset
=
true
;
// reset replicaIndex
int32_t
oldreplicaIndex
=
(
pSyncNode
->
senders
)
[
i
]
->
replicaIndex
;
(
pSyncNode
->
senders
)
[
i
]
->
replicaIndex
=
i
;
int32_t
oldreplicaIndex
=
pSyncNode
->
senders
[
i
]
->
replicaIndex
;
pSyncNode
->
senders
[
i
]
->
replicaIndex
=
i
;
sNTrace
(
pSyncNode
,
"snapshot sender udpate replicaIndex from %d to %d, %s:%d, %p, reset:%d"
,
oldreplicaIndex
,
i
,
host
,
port
,
(
pSyncNode
->
senders
)
[
i
],
reset
);
i
,
host
,
port
,
pSyncNode
->
senders
[
i
],
reset
);
break
;
}
...
...
@@ -1658,18 +1661,23 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
// create new
for
(
int32_t
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
if
((
pSyncNode
->
senders
)[
i
]
==
NULL
)
{
(
pSyncNode
->
senders
)[
i
]
=
snapshotSenderCreate
(
pSyncNode
,
i
);
sSTrace
((
pSyncNode
->
senders
)[
i
],
"snapshot sender create new while reconfig, data:%p"
,
(
pSyncNode
->
senders
)[
i
]);
if
(
pSyncNode
->
senders
[
i
]
==
NULL
)
{
pSyncNode
->
senders
[
i
]
=
snapshotSenderCreate
(
pSyncNode
,
i
);
if
(
pSyncNode
->
senders
[
i
]
==
NULL
)
{
// will be created later while send snapshot
sSError
(
pSyncNode
->
senders
[
i
],
"snapshot sender create failed while reconfig"
);
}
else
{
sSDebug
(
pSyncNode
->
senders
[
i
],
"snapshot sender create while reconfig, data:%p"
,
pSyncNode
->
senders
[
i
]);
}
}
else
{
sS
Trace
((
pSyncNode
->
senders
)[
i
],
"snapshot sender already exist, data:%p"
,
(
pSyncNode
->
senders
)
[
i
]);
sS
Debug
(
pSyncNode
->
senders
[
i
],
"snapshot sender already exist, data:%p"
,
pSyncNode
->
senders
[
i
]);
}
}
// free old
for
(
int32_t
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
if
(
oldSenders
[
i
]
!=
NULL
)
{
s
NTrace
(
pSyncNode
,
"snapshot sender destroy old, data:%p replica-index:%d"
,
oldSenders
[
i
],
i
);
s
SDebug
(
oldSenders
[
i
]
,
"snapshot sender destroy old, data:%p replica-index:%d"
,
oldSenders
[
i
],
i
);
snapshotSenderDestroy
(
oldSenders
[
i
]);
oldSenders
[
i
]
=
NULL
;
}
...
...
@@ -1844,8 +1852,8 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
SSyncSnapshotSender* pMySender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->myRaftId));
if (pMySender != NULL) {
for (int32_t i = 0; i < pSyncNode->pMatchIndex->replicaNum; ++i) {
if (
(pSyncNode->senders)
[i]->privateTerm > pMySender->privateTerm) {
pMySender->privateTerm =
(pSyncNode->senders)
[i]->privateTerm;
if (
pSyncNode->senders
[i]->privateTerm > pMySender->privateTerm) {
pMySender->privateTerm =
pSyncNode->senders
[i]->privateTerm;
}
}
(pMySender->privateTerm) += 100;
...
...
source/libs/sync/src/syncPipeline.c
浏览文件 @
83112644
...
...
@@ -566,7 +566,9 @@ _out:
return
ret
;
}
int32_t
syncLogReplMgrReset
(
SSyncLogReplMgr
*
pMgr
)
{
void
syncLogReplMgrReset
(
SSyncLogReplMgr
*
pMgr
)
{
if
(
pMgr
==
NULL
)
return
;
ASSERT
(
pMgr
->
startIndex
>=
0
);
for
(
SyncIndex
index
=
pMgr
->
startIndex
;
index
<
pMgr
->
endIndex
;
index
++
)
{
memset
(
&
pMgr
->
states
[
index
%
pMgr
->
size
],
0
,
sizeof
(
pMgr
->
states
[
0
]));
...
...
@@ -576,7 +578,6 @@ int32_t syncLogReplMgrReset(SSyncLogReplMgr* pMgr) {
pMgr
->
endIndex
=
0
;
pMgr
->
restored
=
false
;
pMgr
->
retryBackoff
=
0
;
return
0
;
}
int32_t
syncLogReplMgrRetryOnNeed
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
)
{
...
...
source/libs/sync/src/syncSnapshot.c
浏览文件 @
83112644
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录