Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
082428ac
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22017
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
082428ac
编写于
11月 26, 2022
作者:
B
Benguang Zhao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
enh: reduce the number of probing msgs under stress in recovery mode
上级
736a1cc0
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
28 addition
and
26 deletion
+28
-26
source/libs/sync/inc/syncPipeline.h
source/libs/sync/inc/syncPipeline.h
+1
-1
source/libs/sync/src/syncPipeline.c
source/libs/sync/src/syncPipeline.c
+27
-25
未找到文件。
source/libs/sync/inc/syncPipeline.h
浏览文件 @
082428ac
...
@@ -87,7 +87,7 @@ int32_t syncLogBufferReplicateOneTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, Sy
...
@@ -87,7 +87,7 @@ int32_t syncLogBufferReplicateOneTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, Sy
SRaftId
*
pDestId
,
bool
*
pBarrier
);
SRaftId
*
pDestId
,
bool
*
pBarrier
);
int32_t
syncLogReplMgrProcessReply
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncLogReplMgrProcessReply
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncLogReplMgrReplicateAttemptedOnce
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
);
int32_t
syncLogReplMgrReplicateAttemptedOnce
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
);
int32_t
syncLogReplMgrReplicateProbeOnce
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
);
int32_t
syncLogReplMgrReplicateProbeOnce
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncIndex
index
);
int32_t
syncLogReplMgrProcessReplyInRecoveryMode
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncLogReplMgrProcessReplyInRecoveryMode
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncLogReplMgrProcessReplyInNormalMode
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncLogReplMgrProcessReplyInNormalMode
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncAppendEntriesReply
*
pMsg
);
int32_t
syncLogReplMgrProcessHeartbeatReply
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncHeartbeatReply
*
pMsg
);
int32_t
syncLogReplMgrProcessHeartbeatReply
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncHeartbeatReply
*
pMsg
);
...
...
source/libs/sync/src/syncPipeline.c
浏览文件 @
082428ac
...
@@ -612,6 +612,7 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
...
@@ -612,6 +612,7 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
pMgr
->
states
[
pos
].
timeMs
=
nowMs
;
pMgr
->
states
[
pos
].
timeMs
=
nowMs
;
pMgr
->
states
[
pos
].
term
=
term
;
pMgr
->
states
[
pos
].
term
=
term
;
pMgr
->
states
[
pos
].
acked
=
false
;
pMgr
->
states
[
pos
].
acked
=
false
;
retried
=
true
;
retried
=
true
;
if
(
firstIndex
==
-
1
)
firstIndex
=
index
;
if
(
firstIndex
==
-
1
)
firstIndex
=
index
;
count
++
;
count
++
;
...
@@ -658,6 +659,7 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod
...
@@ -658,6 +659,7 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod
pMgr
->
states
[
pMsg
->
lastSendIndex
%
pMgr
->
size
].
acked
=
true
;
pMgr
->
states
[
pMsg
->
lastSendIndex
%
pMgr
->
size
].
acked
=
true
;
if
(
pMsg
->
success
&&
pMsg
->
matchIndex
==
pMsg
->
lastSendIndex
)
{
if
(
pMsg
->
success
&&
pMsg
->
matchIndex
==
pMsg
->
lastSendIndex
)
{
pMgr
->
matchIndex
=
pMsg
->
matchIndex
;
pMgr
->
restored
=
true
;
pMgr
->
restored
=
true
;
sInfo
(
"vgId:%d, sync log repl mgr restored. peer: %s:%d (%"
PRIx64
"), mgr: rs(%d) [%"
PRId64
" %"
PRId64
sInfo
(
"vgId:%d, sync log repl mgr restored. peer: %s:%d (%"
PRIx64
"), mgr: rs(%d) [%"
PRId64
" %"
PRId64
", %"
PRId64
"), buffer: [%"
PRId64
" %"
PRId64
" %"
PRId64
", %"
PRId64
")"
,
", %"
PRId64
"), buffer: [%"
PRId64
" %"
PRId64
" %"
PRId64
", %"
PRId64
")"
,
...
@@ -667,8 +669,8 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod
...
@@ -667,8 +669,8 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod
}
}
if
(
pMsg
->
success
==
false
&&
pMsg
->
matchIndex
>=
pMsg
->
lastSendIndex
)
{
if
(
pMsg
->
success
==
false
&&
pMsg
->
matchIndex
>=
pMsg
->
lastSendIndex
)
{
s
Error
(
"vgId:%d, failed to rollback match index. peer: %s:%d, match index: %"
PRId64
", last sent: %"
PRId64
,
s
Warn
(
"vgId:%d, failed to rollback match index. peer: %s:%d, match index: %"
PRId64
", last sent: %"
PRId64
,
pNode
->
vgId
,
host
,
port
,
pMsg
->
matchIndex
,
pMsg
->
lastSendIndex
);
pNode
->
vgId
,
host
,
port
,
pMsg
->
matchIndex
,
pMsg
->
lastSendIndex
);
if
(
syncNodeStartSnapshot
(
pNode
,
&
destId
)
<
0
)
{
if
(
syncNodeStartSnapshot
(
pNode
,
&
destId
)
<
0
)
{
sError
(
"vgId:%d, failed to start snapshot for peer %s:%d"
,
pNode
->
vgId
,
host
,
port
);
sError
(
"vgId:%d, failed to start snapshot for peer %s:%d"
,
pNode
->
vgId
,
host
,
port
);
return
-
1
;
return
-
1
;
...
@@ -676,8 +678,6 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod
...
@@ -676,8 +678,6 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod
sInfo
(
"vgId:%d, snapshot replication to peer %s:%d"
,
pNode
->
vgId
,
host
,
port
);
sInfo
(
"vgId:%d, snapshot replication to peer %s:%d"
,
pNode
->
vgId
,
host
,
port
);
return
0
;
return
0
;
}
}
(
void
)
syncLogReplMgrReset
(
pMgr
);
}
}
// check last match term
// check last match term
...
@@ -709,24 +709,8 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod
...
@@ -709,24 +709,8 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod
}
}
// attempt to replicate the raft log at index
// attempt to replicate the raft log at index
bool
barrier
=
false
;
(
void
)
syncLogReplMgrReset
(
pMgr
);
ASSERT
(
index
>=
0
);
return
syncLogReplMgrReplicateProbeOnce
(
pMgr
,
pNode
,
index
);
if
(
syncLogBufferReplicateOneTo
(
pMgr
,
pNode
,
index
,
&
term
,
&
destId
,
&
barrier
)
<
0
)
{
sError
(
"vgId:%d, failed to replicate log entry since %s. index: %"
PRId64
", peer %s:%d"
,
pNode
->
vgId
,
terrstr
(),
index
,
host
,
port
);
return
-
1
;
}
int64_t
nowMs
=
taosGetMonoTimestampMs
();
pMgr
->
states
[
index
%
pMgr
->
size
].
barrier
=
barrier
;
pMgr
->
states
[
index
%
pMgr
->
size
].
timeMs
=
nowMs
;
pMgr
->
states
[
index
%
pMgr
->
size
].
term
=
term
;
pMgr
->
states
[
index
%
pMgr
->
size
].
acked
=
false
;
pMgr
->
matchIndex
=
index
;
pMgr
->
startIndex
=
index
;
pMgr
->
endIndex
=
index
+
1
;
return
0
;
}
}
int32_t
syncLogReplMgrProcessHeartbeatReply
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncHeartbeatReply
*
pMsg
)
{
int32_t
syncLogReplMgrProcessHeartbeatReply
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncHeartbeatReply
*
pMsg
)
{
...
@@ -766,14 +750,23 @@ int32_t syncLogReplMgrReplicateOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
...
@@ -766,14 +750,23 @@ int32_t syncLogReplMgrReplicateOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
if
(
pMgr
->
restored
)
{
if
(
pMgr
->
restored
)
{
(
void
)
syncLogReplMgrReplicateAttemptedOnce
(
pMgr
,
pNode
);
(
void
)
syncLogReplMgrReplicateAttemptedOnce
(
pMgr
,
pNode
);
}
else
{
}
else
{
(
void
)
syncLogReplMgrReplicateProbeOnce
(
pMgr
,
pNode
);
(
void
)
syncLogReplMgrReplicateProbeOnce
(
pMgr
,
pNode
,
pNode
->
pLogBuf
->
matchIndex
);
}
}
return
0
;
return
0
;
}
}
int32_t
syncLogReplMgrReplicateProbeOnce
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
)
{
int32_t
syncLogReplMgrReplicateProbeOnce
(
SSyncLogReplMgr
*
pMgr
,
SSyncNode
*
pNode
,
SyncIndex
index
)
{
ASSERT
(
!
pMgr
->
restored
);
ASSERT
(
!
pMgr
->
restored
);
SyncIndex
index
=
pNode
->
pLogBuf
->
matchIndex
;
ASSERT
(
pMgr
->
startIndex
>=
0
);
int64_t
retryMaxWaitMs
=
SYNC_LOG_REPL_RETRY_WAIT_MS
*
(
1
<<
SYNC_MAX_RETRY_BACKOFF
);
int64_t
nowMs
=
taosGetMonoTimestampMs
();
if
(
pMgr
->
endIndex
>
pMgr
->
startIndex
&&
nowMs
<
pMgr
->
states
[
pMgr
->
startIndex
%
pMgr
->
size
].
timeMs
+
retryMaxWaitMs
)
{
return
0
;
}
(
void
)
syncLogReplMgrReset
(
pMgr
);
SRaftId
*
pDestId
=
&
pNode
->
replicasId
[
pMgr
->
peerId
];
SRaftId
*
pDestId
=
&
pNode
->
replicasId
[
pMgr
->
peerId
];
bool
barrier
=
false
;
bool
barrier
=
false
;
SyncTerm
term
=
-
1
;
SyncTerm
term
=
-
1
;
...
@@ -783,6 +776,15 @@ int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode
...
@@ -783,6 +776,15 @@ int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode
return
-
1
;
return
-
1
;
}
}
ASSERT
(
index
>=
0
);
pMgr
->
states
[
index
%
pMgr
->
size
].
barrier
=
barrier
;
pMgr
->
states
[
index
%
pMgr
->
size
].
timeMs
=
nowMs
;
pMgr
->
states
[
index
%
pMgr
->
size
].
term
=
term
;
pMgr
->
states
[
index
%
pMgr
->
size
].
acked
=
false
;
pMgr
->
startIndex
=
index
;
pMgr
->
endIndex
=
index
+
1
;
SSyncLogBuffer
*
pBuf
=
pNode
->
pLogBuf
;
SSyncLogBuffer
*
pBuf
=
pNode
->
pLogBuf
;
sTrace
(
"vgId:%d, attempted to probe the %d'th peer with msg of index:%"
PRId64
" term: %"
PRId64
sTrace
(
"vgId:%d, attempted to probe the %d'th peer with msg of index:%"
PRId64
" term: %"
PRId64
". pMgr(rs:%d): [%"
PRId64
" %"
PRId64
", %"
PRId64
"), pBuf: [%"
PRId64
" %"
PRId64
" %"
PRId64
", %"
PRId64
". pMgr(rs:%d): [%"
PRId64
" %"
PRId64
", %"
PRId64
"), pBuf: [%"
PRId64
" %"
PRId64
" %"
PRId64
", %"
PRId64
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录