Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
e17f573e
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e17f573e
编写于
11月 09, 2021
作者:
L
lichuang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[TD-10645][raft]<feature>add raft progress tracker
上级
ccf8f14f
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
498 addition
and
131 deletion
+498
-131
source/libs/sync/inc/raft.h
source/libs/sync/inc/raft.h
+4
-3
source/libs/sync/inc/sync_raft_inflights.h
source/libs/sync/inc/sync_raft_inflights.h
+77
-0
source/libs/sync/inc/sync_raft_progress.h
source/libs/sync/inc/sync_raft_progress.h
+235
-0
source/libs/sync/inc/sync_raft_progress_tracker.h
source/libs/sync/inc/sync_raft_progress_tracker.h
+2
-2
source/libs/sync/src/raft.c
source/libs/sync/src/raft.c
+4
-2
source/libs/sync/src/raft_replication.c
source/libs/sync/src/raft_replication.c
+1
-1
source/libs/sync/src/sync_raft_inflights.c
source/libs/sync/src/sync_raft_inflights.c
+104
-0
source/libs/sync/src/sync_raft_progress.c
source/libs/sync/src/sync_raft_progress.c
+70
-118
source/libs/sync/src/sync_raft_progress_tracker.c
source/libs/sync/src/sync_raft_progress_tracker.c
+1
-5
未找到文件。
source/libs/sync/inc/raft.h
浏览文件 @
e17f573e
...
...
@@ -44,6 +44,7 @@ struct SSyncRaft {
SSyncCluster
cluster
;
int
selfIndex
;
SyncNodeId
selfId
;
SyncGroupId
selfGroupId
;
...
...
@@ -113,9 +114,6 @@ struct SSyncRaft {
**/
uint16_t
heartbeatElapsed
;
// current tick count since start up
uint32_t
currentTick
;
bool
preVote
;
bool
checkQuorum
;
...
...
@@ -130,6 +128,9 @@ struct SSyncRaft {
int
randomizedElectionTimeout
;
bool
disableProposalForwarding
;
// current tick count since start up
uint32_t
currentTick
;
SyncRaftStepFp
stepFp
;
SyncRaftTickFp
tickFp
;
...
...
source/libs/sync/inc/sync_raft_inflights.h
0 → 100644
浏览文件 @
e17f573e
/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http: *www.gnu.org/licenses/>.
*/
#ifndef TD_SYNC_RAFT_INFLIGHTS_H
#define TD_SYNC_RAFT_INFLIGHTS_H
#include "sync.h"
/**
* SSyncRaftInflights limits the number of MsgApp (represented by the largest index
* contained within) sent to followers but not yet acknowledged by them. Callers
* use syncRaftInflightFull() to check whether more messages can be sent,
* call syncRaftInflightAdd() whenever they are sending a new append,
* and release "quota" via FreeLE() whenever an ack is received.
**/
typedef
struct
SSyncRaftInflights
{
/* the starting index in the buffer */
int
start
;
/* number of inflights in the buffer */
int
count
;
/* the size of the buffer */
int
size
;
/**
* buffer contains the index of the last entry
* inside one message.
**/
SyncIndex
*
buffer
;
}
SSyncRaftInflights
;
SSyncRaftInflights
*
syncRaftOpenInflights
(
int
size
);
void
syncRaftCloseInflights
(
SSyncRaftInflights
*
);
static
FORCE_INLINE
void
syncRaftInflightReset
(
SSyncRaftInflights
*
inflights
)
{
inflights
->
count
=
0
;
inflights
->
start
=
0
;
}
static
FORCE_INLINE
bool
syncRaftInflightFull
(
SSyncRaftInflights
*
inflights
)
{
return
inflights
->
count
==
inflights
->
size
;
}
/**
* syncRaftInflightAdd notifies the Inflights that a new message with the given index is being
* dispatched. syncRaftInflightFull() must be called prior to syncRaftInflightAdd()
* to verify that there is room for one more message,
* and consecutive calls to add syncRaftInflightAdd() must provide a
* monotonic sequence of indexes.
**/
void
syncRaftInflightAdd
(
SSyncRaftInflights
*
inflights
,
SyncIndex
inflightIndex
);
/**
* syncRaftInflightFreeLE frees the inflights smaller or equal to the given `to` flight.
**/
void
syncRaftInflightFreeLE
(
SSyncRaftInflights
*
inflights
,
SyncIndex
toIndex
);
/**
* syncRaftInflightFreeFirstOne releases the first inflight.
* This is a no-op if nothing is inflight.
**/
void
syncRaftInflightFreeFirstOne
(
SSyncRaftInflights
*
inflights
);
#endif
/* TD_SYNC_RAFT_INFLIGHTS_H */
\ No newline at end of file
source/libs/sync/inc/raft_progress.h
→
source/libs/sync/inc/
sync_
raft_progress.h
浏览文件 @
e17f573e
...
...
@@ -10,62 +10,52 @@
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http:
//
www.gnu.org/licenses/>.
* along with this program. If not, see <http:
*
www.gnu.org/licenses/>.
*/
#ifndef TD_SYNC_RAFT_PROGRESS_H
#define TD_SYNC_RAFT_PROGRESS_H
#include "sync_type.h"
/**
* SSyncRaftInflights is a sliding window for the inflight messages.
* Thus inflight effectively limits both the number of inflight messages
* and the bandwidth each Progress can use.
* When inflights is full, no more message should be sent.
* When a leader sends out a message, the index of the last
* entry should be added to inflights. The index MUST be added
* into inflights in order.
* When a leader receives a reply, the previous inflights should
* be freed by calling syncRaftInflightFreeTo with the index of the last
* received entry.
**/
typedef
struct
SSyncRaftInflights
{
/* the starting index in the buffer */
int
start
;
/* number of inflights in the buffer */
int
count
;
/* the size of the buffer */
int
size
;
/**
* buffer contains the index of the last entry
* inside one message.
**/
SyncIndex
*
buffer
;
}
SSyncRaftInflights
;
#include "sync_raft_inflights.h"
/**
* State defines how the leader should interact with the follower.
*
* When in PROGRESS_PROBE, leader sends at most one replication message
* When in PROGRESS_
STATE_
PROBE, leader sends at most one replication message
* per heartbeat interval. It also probes actual progress of the follower.
*
* When in PROGRESS_REPLICATE, leader optimistically increases next
* When in PROGRESS_
STATE_
REPLICATE, leader optimistically increases next
* to the latest entry sent after sending replication message. This is
* an optimized state for fast replicating log entries to the follower.
*
* When in PROGRESS_SNAPSHOT, leader should have sent out snapshot
* When in PROGRESS_S
TATE_S
NAPSHOT, leader should have sent out snapshot
* before and stops sending any replication message.
*
* PROGRESS_PROBE is the initial state.
* PROGRESS_
STATE_
PROBE is the initial state.
**/
typedef
enum
RaftProgressState
{
PROGRESS_PROBE
=
0
,
PROGRESS_REPLICATE
,
PROGRESS_SNAPSHOT
,
/**
* StateProbe indicates a follower whose last index isn't known. Such a
* follower is "probed" (i.e. an append sent periodically) to narrow down
* its last index. In the ideal (and common) case, only one round of probing
* is necessary as the follower will react with a hint. Followers that are
* probed over extended periods of time are often offline.
**/
PROGRESS_STATE_PROBE
=
0
,
/**
* StateReplicate is the state steady in which a follower eagerly receives
* log entries to append to its log.
**/
PROGRESS_STATE_REPLICATE
,
/**
* StateSnapshot indicates a follower that needs log entries not available
* from the leader's Raft log. Such a follower needs a full snapshot to
* return to StateReplicate.
**/
PROGRESS_STATE_SNAPSHOT
,
}
RaftProgressState
;
/**
...
...
@@ -73,25 +63,27 @@ typedef enum RaftProgressState {
* progresses of all followers, and sends entries to the follower based on its progress.
**/
struct
SSyncRaftProgress
{
SyncNodeId
id
;
SyncIndex
nextIndex
;
SyncIndex
matchIndex
;
/**
* State defines how the leader should interact with the follower.
*
* When in StateProbe, leader sends at most one replication message
* per heartbeat interval. It also probes actual progress of the follower.
*
* When in StateReplicate, leader optimistically increases next
* to the latest entry sent after sending replication message. This is
* an optimized state for fast replicating log entries to the follower.
*
* When in StateSnapshot, leader should have sent out snapshot
* before and stops sending any replication message.
**/
RaftProgressState
state
;
/**
* paused is used in PROGRESS_PROBE.
* When paused is true, raft should pause sending replication message to this peer.
**/
bool
paused
;
// last send append message tick
uint32_t
lastSendTick
;
/**
* pendingSnapshotIndex is used in PROGRESS_SNAPSHOT.
* pendingSnapshotIndex is used in PROGRESS_S
TATE_S
NAPSHOT.
* If there is a pending snapshot, the pendingSnapshotIndex will be set to the
* index of the snapshot. If pendingSnapshotIndex is set, the replication process of
* this Progress will be paused. raft will not resend snapshot until the pending one
...
...
@@ -107,90 +99,116 @@ struct SSyncRaftProgress {
bool
recentActive
;
/**
* flow control sliding window
**/
SSyncRaftInflights
inflights
;
* probeSent is used while this follower is in StateProbe. When probeSent is
* true, raft should pause sending replication message to this peer until
* probeSent is reset. See ProbeAcked() and IsPaused().
**/
bool
probeSent
;
/**
* inflights is a sliding window for the inflight messages.
* Each inflight message contains one or more log entries.
* The max number of entries per message is defined in raft config as MaxSizePerMsg.
* Thus inflight effectively limits both the number of inflight messages
* and the bandwidth each Progress can use.
* When inflights is Full, no more message should be sent.
* When a leader sends out a message, the index of the last
* entry should be added to inflights. The index MUST be added
* into inflights in order.
* When a leader receives a reply, the previous inflights should
* be freed by calling inflights.FreeLE with the index of the last
* received entry.
**/
SSyncRaftInflights
*
inflights
;
// IsLearner is true if this progress is tracked for a learner.
/**
* IsLearner is true if this progress is tracked for a learner.
**/
bool
isLearner
;
};
void
syncRaftInitProgress
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
);
void
syncRaftInitProgress
(
int
i
,
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
);
/**
* syncRaftProgressBecomeProbe transitions into StateProbe. Next is reset to Match+1 or,
* optionally and if larger, the index of the pending snapshot.
**/
void
syncRaftProgressBecomeProbe
(
SSyncRaftProgress
*
progress
);
/**
* syncRaftProgressBecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
**/
void
syncRaftProgressBecomeReplicate
(
SSyncRaftProgress
*
progress
);
/**
* syncRaftProgressMaybeUpdate returns false if the given lastIndex index comes from i-th node's log.
* Otherwise it updates the progress and returns true.
* syncRaftProgressMaybeUpdate is called when an MsgAppResp arrives from the follower, with the
* index acked by it. The method returns false if the given n index comes from
* an outdated message. Otherwise it updates the progress and returns true.
**/
bool
syncRaftProgressMaybeUpdate
(
SSyncRaftProgress
*
progress
,
SyncIndex
lastIndex
);
/**
* syncRaftProgressOptimisticNextIndex signals that appends all the way up to and including index n
* are in-flight. As a result, Next is increased to n+1.
**/
static
FORCE_INLINE
void
syncRaftProgressOptimisticNextIndex
(
SSyncRaftProgress
*
progress
,
SyncIndex
nextIndex
)
{
progress
->
nextIndex
=
nextIndex
+
1
;
}
/**
* syncRaftProgressMaybeDecrTo returns false if the given to index comes from an out of order message.
* Otherwise it decreases the progress next index to min(rejected, last) and returns true.
**/
/**
* syncRaftProgressMaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The
* arguments are the index of the append message rejected by the follower, and
* the hint that we want to decrease to.
*
* Rejections can happen spuriously as messages are sent out of order or
* duplicated. In such cases, the rejection pertains to an index that the
* Progress already knows were previously acknowledged, and false is returned
* without changing the Progress.
*
* If the rejection is genuine, Next is lowered sensibly, and the Progress is
* cleared for sending log entries.
**/
bool
syncRaftProgressMaybeDecrTo
(
SSyncRaftProgress
*
progress
,
SyncIndex
rejected
,
SyncIndex
lastIndex
);
SyncIndex
rejected
,
SyncIndex
matchHint
);
/**
* syncRaftProgressIsPaused returns whether sending log entries to this node has been
* paused. A node may be paused because it has rejected recent
* MsgApps, is currently waiting for a snapshot, or has reached the
* MaxInflightMsgs limit.
/**
* syncRaftProgressIsPaused returns whether sending log entries to this node has been throttled.
* This is done when a node has rejected recent MsgApps, is currently waiting
* for a snapshot, or has reached the MaxInflightMsgs limit. In normal
* operation, this is false. A throttled node will be contacted less frequently
* until it has reached a state in which it's able to accept a steady stream of
* log entries again.
**/
bool
syncRaftProgressIsPaused
(
SSyncRaftProgress
*
progress
);
static
FORCE_INLINE
void
syncRaftProgressPause
(
SSyncRaftProgress
*
progress
)
{
progress
->
paused
=
true
;
}
static
FORCE_INLINE
SyncIndex
syncRaftProgressNextIndex
(
SSyncRaftProgress
*
progress
)
{
return
progress
->
nextIndex
;
}
static
FORCE_INLINE
RaftProgressState
syncRaftProgressInReplicate
(
SSyncRaftProgress
*
progress
)
{
return
progress
->
state
==
PROGRESS_REPLICATE
;
return
progress
->
state
==
PROGRESS_
STATE_
REPLICATE
;
}
static
FORCE_INLINE
RaftProgressState
syncRaftProgressInSnapshot
(
SSyncRaftProgress
*
progress
)
{
return
progress
->
state
==
PROGRESS_SNAPSHOT
;
return
progress
->
state
==
PROGRESS_S
TATE_S
NAPSHOT
;
}
static
FORCE_INLINE
RaftProgressState
syncRaftProgressInProbe
(
SSyncRaftProgress
*
progress
)
{
return
progress
->
state
==
PROGRESS_PROBE
;
return
progress
->
state
==
PROGRESS_
STATE_
PROBE
;
}
static
FORCE_INLINE
bool
syncRaftProgressRecentActive
(
SSyncRaftProgress
*
progress
)
{
return
progress
->
recentActive
;
}
static
FORCE_INLINE
bool
syncRaftProgressUpdateSendTick
(
SSyncRaftProgress
*
progress
,
SyncTick
current
)
{
return
progress
->
lastSendTick
=
current
;
}
void
syncRaftProgressFailure
(
SSyncRaftProgress
*
progress
);
bool
syncRaftProgressNeedAbortSnapshot
(
SSyncRaftProgress
*
progress
);
/**
* return true if progress's log is up-todate
**/
bool
syncRaftProgressIsUptodate
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
);
void
syncRaftProgressBecomeProbe
(
SSyncRaftProgress
*
progress
);
void
syncRaftProgressBecomeReplicate
(
SSyncRaftProgress
*
progress
);
void
syncRaftProgressBecomeSnapshot
(
SSyncRaftProgress
*
progress
,
SyncIndex
snapshotIndex
);
/* inflights APIs */
int
syncRaftInflightReset
(
SSyncRaftInflights
*
inflights
);
bool
syncRaftInflightFull
(
SSyncRaftInflights
*
inflights
);
void
syncRaftInflightAdd
(
SSyncRaftInflights
*
inflights
,
SyncIndex
inflightIndex
);
void
syncRaftInflightFreeTo
(
SSyncRaftInflights
*
inflights
,
SyncIndex
toIndex
);
void
syncRaftInflightFreeFirstOne
(
SSyncRaftInflights
*
inflights
);
#if 0
...
...
source/libs/sync/inc/sync_raft_progress_tracker.h
浏览文件 @
e17f573e
...
...
@@ -18,7 +18,7 @@
#include "sync_type.h"
#include "sync_raft_quorum_joint.h"
#include "raft_progress.h"
#include "
sync_
raft_progress.h"
struct
SSyncRaftProgressTrackerConfig
{
SSyncRaftQuorumJointConfig
voters
;
...
...
@@ -94,7 +94,7 @@ SSyncRaftProgressTracker* syncRaftOpenProgressTracker();
void
syncRaftResetVotes
(
SSyncRaftProgressTracker
*
);
typedef
void
(
*
visitProgressFp
)(
SSyncRaftProgress
*
progress
,
void
*
arg
);
typedef
void
(
*
visitProgressFp
)(
int
i
,
SSyncRaftProgress
*
progress
,
void
*
arg
);
void
syncRaftProgressVisit
(
SSyncRaftProgressTracker
*
,
visitProgressFp
visit
,
void
*
arg
);
#endif
/* _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H */
source/libs/sync/src/raft.c
浏览文件 @
e17f573e
...
...
@@ -102,6 +102,8 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) {
syncRaftBecomeFollower
(
pRaft
,
pRaft
->
term
,
SYNC_NON_NODE_ID
);
pRaft
->
selfIndex
=
pRaft
->
cluster
.
selfIndex
;
syncInfo
(
"[%d:%d] restore vgid %d state: snapshot index success"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pInfo
->
vgId
);
return
0
;
...
...
@@ -443,8 +445,8 @@ static void abortLeaderTransfer(SSyncRaft* pRaft) {
pRaft
->
leadTransferee
=
SYNC_NON_NODE_ID
;
}
static
void
initProgress
(
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
syncRaftInitProgress
((
SSyncRaft
*
)
arg
,
progress
);
static
void
initProgress
(
int
i
,
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
syncRaftInitProgress
(
i
,
(
SSyncRaft
*
)
arg
,
progress
);
}
static
void
resetRaft
(
SSyncRaft
*
pRaft
,
SyncTerm
term
)
{
...
...
source/libs/sync/src/raft_replication.c
浏览文件 @
e17f573e
...
...
@@ -15,7 +15,7 @@
#include "raft.h"
#include "raft_log.h"
#include "raft_progress.h"
#include "
sync_
raft_progress.h"
#include "raft_replication.h"
static
int
sendSnapshot
(
SSyncRaft
*
pRaft
,
int
i
);
...
...
source/libs/sync/src/sync_raft_inflights.c
0 → 100644
浏览文件 @
e17f573e
/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http: *www.gnu.org/licenses/>.
*/
#include "sync_raft_inflights.h"
SSyncRaftInflights
*
syncRaftOpenInflights
(
int
size
)
{
SSyncRaftInflights
*
inflights
=
(
SSyncRaftInflights
*
)
malloc
(
sizeof
(
SSyncRaftInflights
));
if
(
inflights
==
NULL
)
{
return
NULL
;
}
SyncIndex
*
buffer
=
(
SyncIndex
*
)
malloc
(
sizeof
(
SyncIndex
)
*
size
);
if
(
buffer
==
NULL
)
{
free
(
inflights
);
return
NULL
;
}
*
inflights
=
(
SSyncRaftInflights
)
{
.
buffer
=
buffer
,
.
count
=
0
,
.
size
=
0
,
.
start
=
0
,
};
return
inflights
;
}
void
syncRaftCloseInflights
(
SSyncRaftInflights
*
inflights
)
{
free
(
inflights
->
buffer
);
free
(
inflights
);
}
/**
* syncRaftInflightAdd notifies the Inflights that a new message with the given index is being
* dispatched. syncRaftInflightFull() must be called prior to syncRaftInflightAdd()
* to verify that there is room for one more message,
* and consecutive calls to add syncRaftInflightAdd() must provide a
* monotonic sequence of indexes.
**/
void
syncRaftInflightAdd
(
SSyncRaftInflights
*
inflights
,
SyncIndex
inflightIndex
)
{
assert
(
!
syncRaftInflightFull
(
inflights
));
int
next
=
inflights
->
start
+
inflights
->
count
;
int
size
=
inflights
->
size
;
/* is next wrapped around buffer? */
if
(
next
>=
size
)
{
next
-=
size
;
}
inflights
->
buffer
[
next
]
=
inflightIndex
;
inflights
->
count
++
;
}
/**
* syncRaftInflightFreeLE frees the inflights smaller or equal to the given `to` flight.
**/
void
syncRaftInflightFreeLE
(
SSyncRaftInflights
*
inflights
,
SyncIndex
toIndex
)
{
if
(
inflights
->
count
==
0
||
toIndex
<
inflights
->
buffer
[
inflights
->
start
])
{
/* out of the left side of the window */
return
;
}
int
i
,
idx
;
for
(
i
=
0
,
idx
=
inflights
->
start
;
i
<
inflights
->
count
;
i
++
)
{
if
(
toIndex
<
inflights
->
buffer
[
idx
])
{
// found the first large inflight
break
;
}
// increase index and maybe rotate
int
size
=
inflights
->
size
;
idx
++
;
if
(
idx
>=
size
)
{
idx
-=
size
;
}
}
// free i inflights and set new start index
inflights
->
count
-=
i
;
inflights
->
start
=
idx
;
assert
(
inflights
->
count
>=
0
);
if
(
inflights
->
count
==
0
)
{
// inflights is empty, reset the start index so that we don't grow the
// buffer unnecessarily.
inflights
->
start
=
0
;
}
}
/**
* syncRaftInflightFreeFirstOne releases the first inflight.
* This is a no-op if nothing is inflight.
**/
void
syncRaftInflightFreeFirstOne
(
SSyncRaftInflights
*
inflights
)
{
syncRaftInflightFreeLE
(
inflights
,
inflights
->
buffer
[
inflights
->
start
]);
}
source/libs/sync/src/raft_progress.c
→
source/libs/sync/src/
sync_
raft_progress.c
浏览文件 @
e17f573e
...
...
@@ -15,57 +15,50 @@
#include "raft.h"
#include "raft_log.h"
#include "raft_progress.h"
#include "sync_raft_progress.h"
#include "sync_raft_progress_tracker.h"
#include "sync.h"
#include "syncInt.h"
static
void
resetProgressState
(
SSyncRaftProgress
*
progress
,
RaftProgressState
state
);
static
void
probeAcked
(
SSyncRaftProgress
*
progress
);
static
void
resumeProgress
(
SSyncRaftProgress
*
progress
);
int
syncRaftProgressCreate
(
SSyncRaft
*
pRaft
)
{
/*
inflights->buffer = (SyncIndex*)malloc(sizeof(SyncIndex) * pRaft->maxInflightMsgs);
if (inflights->buffer == NULL) {
return RAFT_OOM;
void
syncRaftInitProgress
(
int
i
,
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
)
{
SSyncRaftInflights
*
inflights
=
syncRaftOpenInflights
(
pRaft
->
tracker
->
maxInflight
);
if
(
inflights
==
NULL
)
{
return
;
}
inflights->size = pRaft->maxInflightMsgs;
*/
}
/*
int syncRaftProgressRecreate(SSyncRaft* pRaft, const RaftConfiguration* configuration) {
}
*/
void
syncRaftInitProgress
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
)
{
*
progress
=
(
SSyncRaftProgress
)
{
.
matchIndex
=
progress
->
id
==
pRaft
->
selfId
?
syncRaftLogLastIndex
(
pRaft
->
log
)
:
0
,
.
matchIndex
=
i
==
pRaft
->
selfIndex
?
syncRaftLogLastIndex
(
pRaft
->
log
)
:
0
,
.
nextIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
)
+
1
,
//.inflights =
.
inflights
=
inflights
,
};
}
/**
* syncRaftProgressMaybeUpdate is called when an MsgAppResp arrives from the follower, with the
* index acked by it. The method returns false if the given n index comes from
* an outdated message. Otherwise it updates the progress and returns true.
**/
bool
syncRaftProgressMaybeUpdate
(
SSyncRaftProgress
*
progress
,
SyncIndex
lastIndex
)
{
bool
updated
=
false
;
if
(
progress
->
matchIndex
<
lastIndex
)
{
progress
->
matchIndex
=
lastIndex
;
updated
=
true
;
resumeProgress
(
progress
);
}
if
(
progress
->
nextIndex
<
lastIndex
+
1
)
{
progress
->
nextIndex
=
lastIndex
+
1
;
probeAcked
(
progress
);
}
progress
->
nextIndex
=
MAX
(
progress
->
nextIndex
,
lastIndex
+
1
);
return
updated
;
}
bool
syncRaftProgressMaybeDecrTo
(
SSyncRaftProgress
*
progress
,
SyncIndex
rejected
,
SyncIndex
lastIndex
)
{
if
(
progress
->
state
==
PROGRESS_REPLICATE
)
{
SyncIndex
rejected
,
SyncIndex
matchHint
)
{
if
(
progress
->
state
==
PROGRESS_
STATE_
REPLICATE
)
{
/**
* the rejection must be stale if the progress has matched and "rejected"
* is smaller than "match".
...
...
@@ -77,143 +70,102 @@ bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress,
/* directly decrease next to match + 1 */
progress
->
nextIndex
=
progress
->
matchIndex
+
1
;
//syncRaftProgressBecomeProbe(raft, i);
return
true
;
}
/**
* The rejection must be stale if "rejected" does not match next - 1. This
* is because non-replicating followers are probed one entry at a time.
**/
if
(
rejected
!=
progress
->
nextIndex
-
1
)
{
syncDebug
(
"rejected index %"
PRId64
" different from next index %"
PRId64
" -> ignore"
,
rejected
,
progress
->
nextIndex
);
return
false
;
}
progress
->
nextIndex
=
MIN
(
rejected
,
lastIndex
+
1
);
if
(
progress
->
nextIndex
<
1
)
{
progress
->
nextIndex
=
1
;
}
progress
->
nextIndex
=
MAX
(
MIN
(
rejected
,
matchHint
+
1
),
1
);
resumeProgress
(
progress
)
;
progress
->
probeSent
=
false
;
return
true
;
}
static
void
resumeProgress
(
SSyncRaftProgress
*
progress
)
{
progress
->
paused
=
false
;
}
/**
* syncRaftProgressIsPaused returns whether sending log entries to this node has been throttled.
* This is done when a node has rejected recent MsgApps, is currently waiting
* for a snapshot, or has reached the MaxInflightMsgs limit. In normal
* operation, this is false. A throttled node will be contacted less frequently
* until it has reached a state in which it's able to accept a steady stream of
* log entries again.
**/
bool
syncRaftProgressIsPaused
(
SSyncRaftProgress
*
progress
)
{
switch
(
progress
->
state
)
{
case
PROGRESS_PROBE
:
return
progress
->
p
aused
;
case
PROGRESS_REPLICATE
:
return
syncRaftInflightFull
(
&
progress
->
inflights
);
case
PROGRESS_SNAPSHOT
:
case
PROGRESS_
STATE_
PROBE
:
return
progress
->
p
robeSent
;
case
PROGRESS_
STATE_
REPLICATE
:
return
syncRaftInflightFull
(
progress
->
inflights
);
case
PROGRESS_S
TATE_S
NAPSHOT
:
return
true
;
default:
syncFatal
(
"error sync state:%d"
,
progress
->
state
);
}
}
void
syncRaftProgressFailure
(
SSyncRaftProgress
*
progress
)
{
progress
->
pendingSnapshotIndex
=
0
;
}
bool
syncRaftProgressNeedAbortSnapshot
(
SSyncRaftProgress
*
progress
)
{
return
progress
->
state
==
PROGRESS_SNAPSHOT
&&
progress
->
matchIndex
>=
progress
->
pendingSnapshotIndex
;
}
bool
syncRaftProgressIsUptodate
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
)
{
return
syncRaftLogLastIndex
(
pRaft
->
log
)
+
1
==
progress
->
nextIndex
;
}
/**
* syncRaftProgressBecomeProbe transitions into StateProbe. Next is reset to Match+1 or,
* optionally and if larger, the index of the pending snapshot.
**/
void
syncRaftProgressBecomeProbe
(
SSyncRaftProgress
*
progress
)
{
/**
* If the original state is ProgressStateSnapshot, progress knows that
* the pending snapshot has been sent to this peer successfully, then
* probes from pendingSnapshot + 1.
**/
if
(
progress
->
state
==
PROGRESS_SNAPSHOT
)
{
if
(
progress
->
state
==
PROGRESS_S
TATE_S
NAPSHOT
)
{
SyncIndex
pendingSnapshotIndex
=
progress
->
pendingSnapshotIndex
;
resetProgressState
(
progress
,
PROGRESS_PROBE
);
resetProgressState
(
progress
,
PROGRESS_
STATE_
PROBE
);
progress
->
nextIndex
=
MAX
(
progress
->
matchIndex
+
1
,
pendingSnapshotIndex
+
1
);
}
else
{
resetProgressState
(
progress
,
PROGRESS_PROBE
);
resetProgressState
(
progress
,
PROGRESS_
STATE_
PROBE
);
progress
->
nextIndex
=
progress
->
matchIndex
+
1
;
}
}
/**
* syncRaftProgressBecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
**/
void
syncRaftProgressBecomeReplicate
(
SSyncRaftProgress
*
progress
)
{
resetProgressState
(
progress
,
PROGRESS_REPLICATE
);
resetProgressState
(
progress
,
PROGRESS_
STATE_
REPLICATE
);
progress
->
nextIndex
=
progress
->
matchIndex
+
1
;
}
void
syncRaftProgressBecomeSnapshot
(
SSyncRaftProgress
*
progress
,
SyncIndex
snapshotIndex
)
{
resetProgressState
(
progress
,
PROGRESS_SNAPSHOT
);
resetProgressState
(
progress
,
PROGRESS_S
TATE_S
NAPSHOT
);
progress
->
pendingSnapshotIndex
=
snapshotIndex
;
}
int
syncRaftInflightReset
(
SSyncRaftInflights
*
inflights
)
{
inflights
->
count
=
0
;
inflights
->
start
=
0
;
return
0
;
}
bool
syncRaftInflightFull
(
SSyncRaftInflights
*
inflights
)
{
return
inflights
->
count
==
inflights
->
size
;
}
void
syncRaftInflightAdd
(
SSyncRaftInflights
*
inflights
,
SyncIndex
inflightIndex
)
{
assert
(
!
syncRaftInflightFull
(
inflights
));
int
next
=
inflights
->
start
+
inflights
->
count
;
int
size
=
inflights
->
size
;
/* is next wrapped around buffer? */
if
(
next
>=
size
)
{
next
-=
size
;
}
inflights
->
buffer
[
next
]
=
inflightIndex
;
inflights
->
count
++
;
}
void
syncRaftInflightFreeTo
(
SSyncRaftInflights
*
inflights
,
SyncIndex
toIndex
)
{
if
(
inflights
->
count
==
0
||
toIndex
<
inflights
->
buffer
[
inflights
->
start
])
{
return
;
}
int
i
,
idx
;
for
(
i
=
0
,
idx
=
inflights
->
start
;
i
<
inflights
->
count
;
i
++
)
{
if
(
toIndex
<
inflights
->
buffer
[
idx
])
{
break
;
}
int
size
=
inflights
->
size
;
idx
++
;
if
(
idx
>=
size
)
{
idx
-=
size
;
}
}
inflights
->
count
-=
i
;
inflights
->
start
=
idx
;
assert
(
inflights
->
count
>=
0
);
if
(
inflights
->
count
==
0
)
{
inflights
->
start
=
0
;
}
}
void
syncRaftInflightFreeFirstOne
(
SSyncRaftInflights
*
inflights
)
{
syncRaftInflightFreeTo
(
inflights
,
inflights
->
buffer
[
inflights
->
start
]);
}
/**
* ResetState moves the Progress into the specified State, resetting ProbeSent,
* PendingSnapshot, and Inflights.
**/
static
void
resetProgressState
(
SSyncRaftProgress
*
progress
,
RaftProgressState
state
)
{
progress
->
p
aused
=
false
;
progress
->
p
robeSent
=
false
;
progress
->
pendingSnapshotIndex
=
0
;
progress
->
state
=
state
;
syncRaftInflightReset
(
&
(
progress
->
inflights
)
);
syncRaftInflightReset
(
progress
->
inflights
);
}
/**
* probeAcked is called when this peer has accepted an append. It resets
* ProbeSent to signal that additional append messages should be sent without
* further delay.
**/
static
void
probeAcked
(
SSyncRaftProgress
*
progress
)
{
progress
->
probeSent
=
false
;
}
#if 0
...
...
@@ -250,33 +202,33 @@ bool syncRaftProgressGetRecentRecv(SSyncRaft* pRaft, int i) {
void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i) {
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]);
resetProgressState(progress, PROGRESS_SNAPSHOT);
resetProgressState(progress, PROGRESS_S
TATE_S
NAPSHOT);
progress->pendingSnapshotIndex = raftLogSnapshotIndex(pRaft->log);
}
void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i) {
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]);
if (progress->state == PROGRESS_SNAPSHOT) {
if (progress->state == PROGRESS_S
TATE_S
NAPSHOT) {
assert(progress->pendingSnapshotIndex > 0);
SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex;
resetProgressState(progress, PROGRESS_PROBE);
resetProgressState(progress, PROGRESS_
STATE_
PROBE);
progress->nextIndex = max(progress->matchIndex + 1, pendingSnapshotIndex);
} else {
resetProgressState(progress, PROGRESS_PROBE);
resetProgressState(progress, PROGRESS_
STATE_
PROBE);
progress->nextIndex = progress->matchIndex + 1;
}
}
void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i) {
resetProgressState(pRaft->leaderState.progress, PROGRESS_REPLICATE);
resetProgressState(pRaft->leaderState.progress, PROGRESS_
STATE_
REPLICATE);
pRaft->leaderState.progress->nextIndex = pRaft->leaderState.progress->matchIndex + 1;
}
void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i) {
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]);
progress->pendingSnapshotIndex = 0;
progress->state = PROGRESS_PROBE;
progress->state = PROGRESS_
STATE_
PROBE;
}
RaftProgressState syncRaftProgressState(SSyncRaft* pRaft, int i) {
...
...
source/libs/sync/src/sync_raft_progress_tracker.c
浏览文件 @
e17f573e
...
...
@@ -32,10 +32,6 @@ void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp vi
int
i
;
for
(
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
SSyncRaftProgress
*
progress
=
&
(
tracker
->
progressMap
[
i
]);
if
(
progress
->
id
==
SYNC_NON_NODE_ID
)
{
continue
;
}
visit
(
progress
,
arg
);
visit
(
i
,
progress
,
arg
);
}
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录