提交 fb01dd56 编写于 作者: L lichuang

[TD-10645][raft]<feature>add restore process

上级 8eeaa271
/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TD_SYNC_RAFT_CONFIG_CHANGE_H
#define TD_SYNC_RAFT_CONFIG_CHANGE_H
#include "sync_type.h"
#include "sync_raft_proto.h"
/**
* Changer facilitates configuration changes. It exposes methods to handle
* simple and joint consensus while performing the proper validation that allows
* refusing invalid configuration changes before they affect the active
* configuration.
**/
struct SSyncRaftChanger {
SSyncRaftProgressTracker* tracker;
SyncIndex lastIndex;
};
typedef int (*configChangeFp)(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
#endif /* TD_SYNC_RAFT_CONFIG_CHANGE_H */
...@@ -129,6 +129,10 @@ struct SSyncRaftProgress { ...@@ -129,6 +129,10 @@ struct SSyncRaftProgress {
bool isLearner; bool isLearner;
}; };
struct SSyncRaftProgressMap {
SSyncRaftProgress progress[TSDB_MAX_REPLICA];
};
void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress); void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress);
/** /**
...@@ -210,7 +214,9 @@ bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress); ...@@ -210,7 +214,9 @@ bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress);
void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex); void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex);
void syncRaftProgressCopy(const SSyncRaftProgress* from, SSyncRaftProgress* to);
void syncRaftProgressMapCopy(const SSyncRaftProgressMap* from, SSyncRaftProgressMap* to);
#if 0 #if 0
......
...@@ -17,77 +17,72 @@ ...@@ -17,77 +17,72 @@
#define _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H #define _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H
#include "sync_type.h" #include "sync_type.h"
#include "sync_raft_quorum.h"
#include "sync_raft_quorum_joint.h" #include "sync_raft_quorum_joint.h"
#include "sync_raft_progress.h" #include "sync_raft_progress.h"
struct SSyncRaftProgressTrackerConfig { struct SSyncRaftProgressTrackerConfig {
SSyncRaftQuorumJointConfig voters; SSyncRaftQuorumJointConfig voters;
/** // autoLeave is true if the configuration is joint and a transition to the
* autoLeave is true if the configuration is joint and a transition to the // incoming configuration should be carried out automatically by Raft when
* incoming configuration should be carried out automatically by Raft when // this is possible. If false, the configuration will be joint until the
* this is possible. If false, the configuration will be joint until the // application initiates the transition manually.
* application initiates the transition manually.
**/
bool autoLeave; bool autoLeave;
/** // Learners is a set of IDs corresponding to the learners active in the
* Learners is a set of IDs corresponding to the learners active in the // current configuration.
* current configuration. //
* // Invariant: Learners and Voters does not intersect, i.e. if a peer is in
* Invariant: Learners and Voters does not intersect, i.e. if a peer is in // either half of the joint config, it can't be a learner; if it is a
* either half of the joint config, it can't be a learner; if it is a // learner it can't be in either half of the joint config. This invariant
* learner it can't be in either half of the joint config. This invariant // simplifies the implementation since it allows peers to have clarity about
* simplifies the implementation since it allows peers to have clarity about // its current role without taking into account joint consensus.
* its current role without taking into account joint consensus. SSyncRaftNodeMap learners;
**/
SyncNodeId learners[TSDB_MAX_REPLICA]; // When we turn a voter into a learner during a joint consensus transition,
// we cannot add the learner directly when entering the joint state. This is
/** // because this would violate the invariant that the intersection of
* When we turn a voter into a learner during a joint consensus transition, // voters and learners is empty. For example, assume a Voter is removed and
* we cannot add the learner directly when entering the joint state. This is // immediately re-added as a learner (or in other words, it is demoted):
* because this would violate the invariant that the intersection of //
* voters and learners is empty. For example, assume a Voter is removed and // Initially, the configuration will be
* immediately re-added as a learner (or in other words, it is demoted): //
* // voters: {1 2 3}
* Initially, the configuration will be // learners: {}
* //
* voters: {1 2 3} // and we want to demote 3. Entering the joint configuration, we naively get
* learners: {} //
* // voters: {1 2} & {1 2 3}
* and we want to demote 3. Entering the joint configuration, we naively get // learners: {3}
* //
* voters: {1 2} & {1 2 3} // but this violates the invariant (3 is both voter and learner). Instead,
* learners: {3} // we get
* //
* but this violates the invariant (3 is both voter and learner). Instead, // voters: {1 2} & {1 2 3}
* we get // learners: {}
* // next_learners: {3}
* voters: {1 2} & {1 2 3} //
* learners: {} // Where 3 is now still purely a voter, but we are remembering the intention
* next_learners: {3} // to make it a learner upon transitioning into the final configuration:
* //
* Where 3 is now still purely a voter, but we are remembering the intention // voters: {1 2}
* to make it a learner upon transitioning into the final configuration: // learners: {3}
* // next_learners: {}
* voters: {1 2} //
* learners: {3} // Note that next_learners is not used while adding a learner that is not
* next_learners: {} // also a voter in the joint config. In this case, the learner is added
* // right away when entering the joint configuration, so that it is caught up
* Note that next_learners is not used while adding a learner that is not // as soon as possible.
* also a voter in the joint config. In this case, the learner is added SSyncRaftNodeMap learnersNext;
* right away when entering the joint configuration, so that it is caught up
* as soon as possible.
**/
SyncNodeId learnersNext[TSDB_MAX_REPLICA];
}; };
struct SSyncRaftProgressTracker { struct SSyncRaftProgressTracker {
SSyncRaftProgressTrackerConfig config; SSyncRaftProgressTrackerConfig config;
SSyncRaftProgress progressMap[TSDB_MAX_REPLICA]; SSyncRaftProgressMap progressMap;
ESyncRaftVoteResult votes[TSDB_MAX_REPLICA]; ESyncRaftVoteType votes[TSDB_MAX_REPLICA];
int maxInflightMsgs; int maxInflightMsgs;
}; };
...@@ -104,6 +99,10 @@ void syncRaftProgressVisit(SSyncRaftProgressTracker*, visitProgressFp visit, voi ...@@ -104,6 +99,10 @@ void syncRaftProgressVisit(SSyncRaftProgressTracker*, visitProgressFp visit, voi
**/ **/
void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, int i, bool grant); void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, int i, bool grant);
void syncRaftCloneTrackerConfig(const SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressTrackerConfig* result);
int syncRaftCheckProgress(const SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
/** /**
* syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the * syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the
* election outcome is known. * election outcome is known.
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TD_SYNC_RAFT_PROTO_H
#define TD_SYNC_RAFT_PROTO_H
#include "sync_type.h"
typedef enum ESyncRaftConfChangeType {
SYNC_RAFT_Conf_AddNode = 0,
SYNC_RAFT_Conf_RemoveNode = 1,
SYNC_RAFT_Conf_UpdateNode = 2,
SYNC_RAFT_Conf_AddLearnerNode = 2,
} ESyncRaftConfChangeType;
// ConfChangeSingle is an individual configuration change operation. Multiple
// such operations can be carried out atomically via a ConfChangeV2.
typedef struct SSyncConfChangeSingle {
ESyncRaftConfChangeType type;
SyncNodeId nodeId;
} SSyncConfChangeSingle;
typedef struct SSyncConfChangeSingleArray {
int n;
SSyncConfChangeSingle* changes;
} SSyncConfChangeSingleArray;
typedef struct SSyncConfigState {
// The voters in the incoming config. (If the configuration is not joint,
// then the outgoing config is empty).
SSyncRaftNodeMap voters;
// The learners in the incoming config.
SSyncRaftNodeMap learners;
// The voters in the outgoing config.
SSyncRaftNodeMap votersOutgoing;
// The nodes that will become learners when the outgoing config is removed.
// These nodes are necessarily currently in nodes_joint (or they would have
// been added to the incoming config right away).
SSyncRaftNodeMap learnersNext;
// If set, the config is joint and Raft will automatically transition into
// the final config (i.e. remove the outgoing config) when this is safe.
bool autoLeave;
} SSyncConfigState;
#endif /* TD_SYNC_RAFT_PROTO_H */
...@@ -25,14 +25,34 @@ ...@@ -25,14 +25,34 @@
* majority configurations. Decisions require the support of both majorities. * majority configurations. Decisions require the support of both majorities.
**/ **/
typedef struct SSyncRaftQuorumJointConfig { typedef struct SSyncRaftQuorumJointConfig {
SSyncCluster majorityConfig[2]; SSyncCluster outgoing;
}SSyncRaftQuorumJointConfig; SSyncCluster incoming;
} SSyncRaftQuorumJointConfig;
/** /**
* syncRaftVoteResult takes a mapping of voters to yes/no (true/false) votes and returns * syncRaftVoteResult takes a mapping of voters to yes/no (true/false) votes and returns
* a result indicating whether the vote is pending, lost, or won. A joint quorum * a result indicating whether the vote is pending, lost, or won. A joint quorum
* requires both majority quorums to vote in favor. * requires both majority quorums to vote in favor.
**/ **/
ESyncRaftVoteResult syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteResult* votes); ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteType* votes);
static FORCE_INLINE bool syncRaftJointConfigInCluster(const SSyncCluster* cluster, SyncNodeId id) {
int i;
for (i = 0; i < cluster->replica; ++i) {
if (cluster->nodeInfo[i].nodeId == id) {
return true;
}
}
return false;
}
static FORCE_INLINE bool syncRaftJointConfigInOutgoing(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
return syncRaftJointConfigInCluster(&config->outgoing, id);
}
static FORCE_INLINE bool syncRaftJointConfigInIncoming(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
return syncRaftJointConfigInCluster(&config->incoming, id);
}
#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */ #endif /* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "sync.h" #include "sync.h"
#include "sync_type.h" #include "sync_type.h"
#include "sync_raft_quorum.h"
/** /**
* syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns * syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns
...@@ -25,6 +26,6 @@ ...@@ -25,6 +26,6 @@
* yes/no has been reached), won (a quorum of yes has been reached), or lost (a * yes/no has been reached), won (a quorum of yes has been reached), or lost (a
* quorum of no has been reached). * quorum of no has been reached).
**/ **/
ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteResult* votes); ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteType* votes);
#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H */ #endif /* _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H */
/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TD_SYNC_RAFT_RESTORE_H
#define TD_SYNC_RAFT_RESTORE_H
#include "sync_type.h"
#include "sync_raft_proto.h"
int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
#endif /* TD_SYNC_RAFT_RESTORE_H */
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#define _TD_LIBS_SYNC_TYPE_H #define _TD_LIBS_SYNC_TYPE_H
#include <stdint.h> #include <stdint.h>
#include "sync.h"
#include "osMath.h" #include "osMath.h"
#define SYNC_NON_NODE_ID -1 #define SYNC_NON_NODE_ID -1
...@@ -28,10 +29,13 @@ typedef uint32_t SyncTick; ...@@ -28,10 +29,13 @@ typedef uint32_t SyncTick;
typedef struct SSyncRaft SSyncRaft; typedef struct SSyncRaft SSyncRaft;
typedef struct SSyncRaftProgress SSyncRaftProgress; typedef struct SSyncRaftProgress SSyncRaftProgress;
typedef struct SSyncRaftProgressMap SSyncRaftProgressMap;
typedef struct SSyncRaftProgressTrackerConfig SSyncRaftProgressTrackerConfig; typedef struct SSyncRaftProgressTrackerConfig SSyncRaftProgressTrackerConfig;
typedef struct SSyncRaftProgressTracker SSyncRaftProgressTracker; typedef struct SSyncRaftProgressTracker SSyncRaftProgressTracker;
typedef struct SSyncRaftChanger SSyncRaftChanger;
typedef struct SSyncRaftLog SSyncRaftLog; typedef struct SSyncRaftLog SSyncRaftLog;
typedef struct SSyncRaftEntry SSyncRaftEntry; typedef struct SSyncRaftEntry SSyncRaftEntry;
...@@ -46,6 +50,11 @@ typedef struct SSyncRaftEntry SSyncRaftEntry; ...@@ -46,6 +50,11 @@ typedef struct SSyncRaftEntry SSyncRaftEntry;
#endif #endif
#endif #endif
typedef struct {
int32_t replica;
SyncNodeId nodeId[TSDB_MAX_REPLICA];
} SSyncRaftNodeMap;
typedef enum { typedef enum {
SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0, SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0,
SYNC_RAFT_CAMPAIGN_ELECTION = 1, SYNC_RAFT_CAMPAIGN_ELECTION = 1,
...@@ -61,6 +70,6 @@ typedef enum { ...@@ -61,6 +70,6 @@ typedef enum {
//reject the vote request //reject the vote request
SYNC_RAFT_VOTE_RESP_REJECT = 2, SYNC_RAFT_VOTE_RESP_REJECT = 2,
} ESyncRaftVoteResult; } ESyncRaftVoteType;
#endif /* _TD_LIBS_SYNC_TYPE_H */ #endif /* _TD_LIBS_SYNC_TYPE_H */
/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "syncInt.h"
#include "sync_raft_config_change.h"
#include "sync_raft_progress.h"
#include "sync_raft_progress_tracker.h"
static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config);
static int applyConfig(SSyncRaftChanger* changer, const SSyncRaftProgressTrackerConfig* config,
const SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css);
// Simple carries out a series of configuration changes that (in aggregate)
// mutates the incoming majority config Voters[0] by at most one. This method
// will return an error if that is not the case, if the resulting quorum is
// zero, or if the configuration is in a joint state (i.e. if there is an
// outgoing configuration).
int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
int ret;
ret = checkAndCopy(changer, config, progressMap);
if (ret != 0) {
return ret;
}
if (hasJointConfig(config)) {
return -1;
}
ret = applyConfig(changer, config, progressMap, css);
if (ret != 0) {
return ret;
}
return checkAndReturn(config, progressMap);
}
// checkAndCopy copies the tracker's config and progress map (deeply enough for
// the purposes of the Changer) and returns those copies. It returns an error
// if checkInvariants does.
static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
syncRaftCloneTrackerConfig(&changer->tracker->config, config);
int i;
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
SSyncRaftProgress* progress = &(changer->tracker->progressMap.progress[i]);
if (progress->id == SYNC_NON_NODE_ID) {
continue;
}
syncRaftProgressCopy(progress, &(progressMap->progress[i]));
}
return checkAndReturn(config, progressMap);
}
// checkAndReturn calls checkInvariants on the input and returns either the
// resulting error or the input.
static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
if (checkInvariants(config, progressMap) != 0) {
return -1;
}
return 0;
}
// checkInvariants makes sure that the config and progress are compatible with
// each other. This is used to check both what the Changer is initialized with,
// as well as what it returns.
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
int ret = syncRaftCheckProgress(config, progressMap);
if (ret != 0) {
return ret;
}
int i;
// Any staged learner was staged because it could not be directly added due
// to a conflicting voter in the outgoing config.
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
if (!syncRaftJointConfigInOutgoing(&config->voters, config->learnersNext.nodeId[i])) {
return -1;
}
if (progressMap->progress[i].id != SYNC_NON_NODE_ID && progressMap->progress[i].isLearner) {
syncError("%d is in LearnersNext, but is already marked as learner", progressMap->progress[i].id);
return -1;
}
}
// Conversely Learners and Voters doesn't intersect at all.
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
if (syncRaftJointConfigInIncoming(&config->voters, config->learners.nodeId[i])) {
syncError("%d is in Learners and voter.incoming", progressMap->progress[i].id);
return -1;
}
if (progressMap->progress[i].id != SYNC_NON_NODE_ID && !progressMap->progress[i].isLearner) {
syncError("%d is in Learners, but is not marked as learner", progressMap->progress[i].id);
return -1;
}
}
if (!hasJointConfig(config)) {
// We enforce that empty maps are nil instead of zero.
if (config->learnersNext.replica > 0) {
syncError("cfg.LearnersNext must be nil when not joint");
return -1;
}
if (config->autoLeave) {
syncError("AutoLeave must be false when not joint");
return -1;
}
}
return 0;
}
static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config) {
return config->voters.outgoing.replica > 0;
}
static int applyConfig(SSyncRaftChanger* changer, const SSyncRaftProgressTrackerConfig* config,
const SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css) {
int i;
for (i = 0; i < css->n; ++i) {
const SSyncConfChangeSingle* cs = &(css->changes[i]);
if (cs->nodeId == SYNC_NON_NODE_ID) {
continue;
}
ESyncRaftConfChangeType type = cs->type;
switch (type) {
}
}
if (config->voters.incoming.replica == 0) {
return -1;
}
return 0;
}
\ No newline at end of file
...@@ -243,7 +243,7 @@ static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) { ...@@ -243,7 +243,7 @@ static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) {
syncRaftLogAppend(pRaft->log, entries, n); syncRaftLogAppend(pRaft->log, entries, n);
SSyncRaftProgress* progress = &(pRaft->tracker->progressMap[pRaft->cluster.selfIndex]); SSyncRaftProgress* progress = &(pRaft->tracker->progressMap.progress[pRaft->cluster.selfIndex]);
syncRaftProgressMaybeUpdate(progress, lastIndex); syncRaftProgressMaybeUpdate(progress, lastIndex);
// Regardless of maybeCommit's return, our caller will call bcastAppend. // Regardless of maybeCommit's return, our caller will call bcastAppend.
maybeCommit(pRaft); maybeCommit(pRaft);
......
...@@ -149,6 +149,10 @@ void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snaps ...@@ -149,6 +149,10 @@ void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snaps
progress->pendingSnapshotIndex = snapshotIndex; progress->pendingSnapshotIndex = snapshotIndex;
} }
void syncRaftProgressCopy(const SSyncRaftProgress* progress, SSyncRaftProgress* out) {
}
/** /**
* ResetState moves the Progress into the specified State, resetting ProbeSent, * ResetState moves the Progress into the specified State, resetting ProbeSent,
* PendingSnapshot, and Inflights. * PendingSnapshot, and Inflights.
......
...@@ -25,13 +25,13 @@ SSyncRaftProgressTracker* syncRaftOpenProgressTracker() { ...@@ -25,13 +25,13 @@ SSyncRaftProgressTracker* syncRaftOpenProgressTracker() {
} }
void syncRaftResetVotes(SSyncRaftProgressTracker* tracker) { void syncRaftResetVotes(SSyncRaftProgressTracker* tracker) {
memset(tracker->votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(ESyncRaftVoteResult) * TSDB_MAX_REPLICA); memset(tracker->votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(ESyncRaftVoteType) * TSDB_MAX_REPLICA);
} }
void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp visit, void* arg) { void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp visit, void* arg) {
int i; int i;
for (i = 0; i < TSDB_MAX_REPLICA; ++i) { for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
SSyncRaftProgress* progress = &(tracker->progressMap[i]); SSyncRaftProgress* progress = &(tracker->progressMap.progress[i]);
visit(i, progress, arg); visit(i, progress, arg);
} }
} }
...@@ -44,6 +44,10 @@ void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, int i, bool grant) { ...@@ -44,6 +44,10 @@ void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, int i, bool grant) {
tracker->votes[i] = grant ? SYNC_RAFT_VOTE_RESP_GRANT : SYNC_RAFT_VOTE_RESP_REJECT; tracker->votes[i] = grant ? SYNC_RAFT_VOTE_RESP_GRANT : SYNC_RAFT_VOTE_RESP_REJECT;
} }
void syncRaftCloneTrackerConfig(const SSyncRaftProgressTrackerConfig* from, SSyncRaftProgressTrackerConfig* to) {
}
/** /**
* syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the * syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the
* election outcome is known. * election outcome is known.
...@@ -54,7 +58,7 @@ ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* r ...@@ -54,7 +58,7 @@ ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* r
int r, g; int r, g;
for (i = 0, r = 0, g = 0; i < TSDB_MAX_REPLICA; ++i) { for (i = 0, r = 0, g = 0; i < TSDB_MAX_REPLICA; ++i) {
progress = &(tracker->progressMap[i]); progress = &(tracker->progressMap.progress[i]);
if (progress->id == SYNC_NON_NODE_ID) { if (progress->id == SYNC_NON_NODE_ID) {
continue; continue;
} }
......
...@@ -22,9 +22,9 @@ ...@@ -22,9 +22,9 @@
* a result indicating whether the vote is pending, lost, or won. A joint quorum * a result indicating whether the vote is pending, lost, or won. A joint quorum
* requires both majority quorums to vote in favor. * requires both majority quorums to vote in favor.
**/ **/
ESyncRaftVoteResult syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteResult* votes) { ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteType* votes) {
ESyncRaftVoteResult r1 = syncRaftMajorityVoteResult(&(config->majorityConfig[0]), votes); ESyncRaftVoteResult r1 = syncRaftMajorityVoteResult(&(config->incoming), votes);
ESyncRaftVoteResult r2 = syncRaftMajorityVoteResult(&(config->majorityConfig[1]), votes); ESyncRaftVoteResult r2 = syncRaftMajorityVoteResult(&(config->outgoing), votes);
if (r1 == r2) { if (r1 == r2) {
// If they agree, return the agreed state. // If they agree, return the agreed state.
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
* yes/no has been reached), won (a quorum of yes has been reached), or lost (a * yes/no has been reached), won (a quorum of yes has been reached), or lost (a
* quorum of no has been reached). * quorum of no has been reached).
**/ **/
ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteResult* votes) { ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteType* votes) {
if (config->replica == 0) { if (config->replica == 0) {
return SYNC_RAFT_VOTE_WON; return SYNC_RAFT_VOTE_WON;
} }
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "sync_raft_config_change.h"
#include "sync_raft_restore.h"
#include "sync_raft_progress_tracker.h"
static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in);
int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
SSyncConfChangeSingleArray outgoing;
SSyncConfChangeSingleArray incoming;
SSyncConfChangeSingleArray css;
int i, ret;
ret = toConfChangeSingle(cs, &outgoing, &incoming);
if (ret != 0) {
goto out;
}
if (outgoing.n == 0) {
// No outgoing config, so just apply the incoming changes one by one.
for (i = 0; i < incoming.n; ++i) {
css = (SSyncConfChangeSingleArray) {
.n = 1,
.changes = &incoming.changes[i],
};
ret = syncRaftChangerSimpleConfig(changer, &css, config, progressMap);
if (ret != 0) {
goto out;
}
syncRaftCloneTrackerConfig(config, &changer->tracker->config);
syncRaftProgressMapCopy(progressMap, &changer->tracker->progressMap);
}
} else {
// The ConfState describes a joint configuration.
//
// First, apply all of the changes of the outgoing config one by one, so
// that it temporarily becomes the incoming active config. For example,
// if the config is (1 2 3)&(2 3 4), this will establish (2 3 4)&().
for (i = 0; i < outgoing.n; ++i) {
css = (SSyncConfChangeSingleArray) {
.n = 1,
.changes = &outgoing.changes[i],
};
ret = syncRaftChangerSimpleConfig(changer, &css, config, progressMap);
if (ret != 0) {
goto out;
}
syncRaftCloneTrackerConfig(config, &changer->tracker->config);
syncRaftProgressMapCopy(progressMap, &changer->tracker->progressMap);
}
ret = syncRaftChangerEnterJoint(changer, &incoming, config, progressMap);
if (ret != 0) {
goto out;
}
syncRaftCloneTrackerConfig(config, &changer->tracker->config);
syncRaftProgressMapCopy(progressMap, &changer->tracker->progressMap);
}
out:
if (incoming.n != 0) free(incoming.changes);
if (outgoing.n != 0) free(outgoing.changes);
return ret;
}
// toConfChangeSingle translates a conf state into 1) a slice of operations creating
// first the config that will become the outgoing one, and then the incoming one, and
// b) another slice that, when applied to the config resulted from 1), represents the
// ConfState.
static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in) {
int i;
out->n = in->n = 0;
out->n = cs->votersOutgoing.replica;
out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * out->n);
if (out->changes == NULL) {
out->n = 0;
return -1;
}
in->n = cs->votersOutgoing.replica + cs->voters.replica + cs->learners.replica + cs->learnersNext.replica;
out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * in->n);
if (in->changes == NULL) {
in->n = 0;
return -1;
}
// Example to follow along this code:
// voters=(1 2 3) learners=(5) outgoing=(1 2 4 6) learners_next=(4)
//
// This means that before entering the joint config, the configuration
// had voters (1 2 4 6) and perhaps some learners that are already gone.
// The new set of voters is (1 2 3), i.e. (1 2) were kept around, and (4 6)
// are no longer voters; however 4 is poised to become a learner upon leaving
// the joint state.
// We can't tell whether 5 was a learner before entering the joint config,
// but it doesn't matter (we'll pretend that it wasn't).
//
// The code below will construct
// outgoing = add 1; add 2; add 4; add 6
// incoming = remove 1; remove 2; remove 4; remove 6
// add 1; add 2; add 3;
// add-learner 5;
// add-learner 4;
//
// So, when starting with an empty config, after applying 'outgoing' we have
//
// quorum=(1 2 4 6)
//
// From which we enter a joint state via 'incoming'
//
// quorum=(1 2 3)&&(1 2 4 6) learners=(5) learners_next=(4)
//
// as desired.
for (i = 0; i < cs->votersOutgoing.replica; ++i) {
// If there are outgoing voters, first add them one by one so that the
// (non-joint) config has them all.
out->changes[i] = (SSyncConfChangeSingle) {
.type = SYNC_RAFT_Conf_AddNode,
.nodeId = cs->votersOutgoing.nodeId[i],
};
}
// We're done constructing the outgoing slice, now on to the incoming one
// (which will apply on top of the config created by the outgoing slice).
// First, we'll remove all of the outgoing voters.
int j = 0;
for (i = 0; i < cs->votersOutgoing.replica; ++i) {
in->changes[j] = (SSyncConfChangeSingle) {
.type = SYNC_RAFT_Conf_RemoveNode,
.nodeId = cs->votersOutgoing.nodeId[i],
};
j += 1;
}
// Then we'll add the incoming voters and learners.
for (i = 0; i < cs->voters.replica; ++i) {
in->changes[j] = (SSyncConfChangeSingle) {
.type = SYNC_RAFT_Conf_AddNode,
.nodeId = cs->voters.nodeId[i],
};
j += 1;
}
for (i = 0; i < cs->learners.replica; ++i) {
in->changes[j] = (SSyncConfChangeSingle) {
.type = SYNC_RAFT_Conf_AddLearnerNode,
.nodeId = cs->learners.nodeId[i],
};
j += 1;
}
// Same for LearnersNext; these are nodes we want to be learners but which
// are currently voters in the outgoing config.
for (i = 0; i < cs->learnersNext.replica; ++i) {
in->changes[j] = (SSyncConfChangeSingle) {
.type = SYNC_RAFT_Conf_AddLearnerNode,
.nodeId = cs->learnersNext.nodeId[i],
};
j += 1;
}
return 0;
}
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册