From fb01dd562878f401f93de4788c1b18af272702f7 Mon Sep 17 00:00:00 2001 From: lichuang Date: Fri, 12 Nov 2021 16:00:33 +0800 Subject: [PATCH] [TD-10645][raft]add restore process --- .../libs/sync/inc/sync_raft_config_change.h | 42 +++++ source/libs/sync/inc/sync_raft_progress.h | 6 + .../sync/inc/sync_raft_progress_tracker.h | 113 ++++++----- source/libs/sync/inc/sync_raft_proto.h | 61 ++++++ source/libs/sync/inc/sync_raft_quorum_joint.h | 26 ++- .../libs/sync/inc/sync_raft_quorum_majority.h | 3 +- source/libs/sync/inc/sync_raft_restore.h | 25 +++ source/libs/sync/inc/sync_type.h | 11 +- .../libs/sync/src/sync_raft_config_change.c | 154 +++++++++++++++ source/libs/sync/src/sync_raft_impl.c | 2 +- source/libs/sync/src/sync_raft_progress.c | 4 + .../sync/src/sync_raft_progress_tracker.c | 10 +- source/libs/sync/src/sync_raft_quorum_joint.c | 6 +- .../libs/sync/src/sync_raft_quorum_majority.c | 2 +- source/libs/sync/src/sync_raft_restore.c | 177 ++++++++++++++++++ 15 files changed, 572 insertions(+), 70 deletions(-) create mode 100644 source/libs/sync/inc/sync_raft_config_change.h create mode 100644 source/libs/sync/inc/sync_raft_proto.h create mode 100644 source/libs/sync/inc/sync_raft_restore.h create mode 100644 source/libs/sync/src/sync_raft_config_change.c create mode 100644 source/libs/sync/src/sync_raft_restore.c diff --git a/source/libs/sync/inc/sync_raft_config_change.h b/source/libs/sync/inc/sync_raft_config_change.h new file mode 100644 index 0000000000..98b8b49cb8 --- /dev/null +++ b/source/libs/sync/inc/sync_raft_config_change.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TD_SYNC_RAFT_CONFIG_CHANGE_H +#define TD_SYNC_RAFT_CONFIG_CHANGE_H + +#include "sync_type.h" +#include "sync_raft_proto.h" + +/** + * Changer facilitates configuration changes. It exposes methods to handle + * simple and joint consensus while performing the proper validation that allows + * refusing invalid configuration changes before they affect the active + * configuration. + **/ +struct SSyncRaftChanger { + SSyncRaftProgressTracker* tracker; + SyncIndex lastIndex; +}; + +typedef int (*configChangeFp)(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css, + SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); + +int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css, + SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); + +int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css, + SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); + +#endif /* TD_SYNC_RAFT_CONFIG_CHANGE_H */ diff --git a/source/libs/sync/inc/sync_raft_progress.h b/source/libs/sync/inc/sync_raft_progress.h index 6a376ad710..b5017f963d 100644 --- a/source/libs/sync/inc/sync_raft_progress.h +++ b/source/libs/sync/inc/sync_raft_progress.h @@ -129,6 +129,10 @@ struct SSyncRaftProgress { bool isLearner; }; +struct SSyncRaftProgressMap { + SSyncRaftProgress progress[TSDB_MAX_REPLICA]; +}; + void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress); /** @@ -210,7 +214,9 @@ bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress); void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex); +void syncRaftProgressCopy(const SSyncRaftProgress* from, SSyncRaftProgress* to); +void syncRaftProgressMapCopy(const SSyncRaftProgressMap* from, SSyncRaftProgressMap* to); #if 0 diff --git a/source/libs/sync/inc/sync_raft_progress_tracker.h b/source/libs/sync/inc/sync_raft_progress_tracker.h index acfa38c378..61308d5df5 100644 --- a/source/libs/sync/inc/sync_raft_progress_tracker.h +++ b/source/libs/sync/inc/sync_raft_progress_tracker.h @@ -17,77 +17,72 @@ #define _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H #include "sync_type.h" +#include "sync_raft_quorum.h" #include "sync_raft_quorum_joint.h" #include "sync_raft_progress.h" struct SSyncRaftProgressTrackerConfig { SSyncRaftQuorumJointConfig voters; - /** - * autoLeave is true if the configuration is joint and a transition to the - * incoming configuration should be carried out automatically by Raft when - * this is possible. If false, the configuration will be joint until the - * application initiates the transition manually. - **/ + // autoLeave is true if the configuration is joint and a transition to the + // incoming configuration should be carried out automatically by Raft when + // this is possible. If false, the configuration will be joint until the + // application initiates the transition manually. bool autoLeave; - /** - * Learners is a set of IDs corresponding to the learners active in the - * current configuration. - * - * Invariant: Learners and Voters does not intersect, i.e. if a peer is in - * either half of the joint config, it can't be a learner; if it is a - * learner it can't be in either half of the joint config. This invariant - * simplifies the implementation since it allows peers to have clarity about - * its current role without taking into account joint consensus. - **/ - SyncNodeId learners[TSDB_MAX_REPLICA]; - - /** - * When we turn a voter into a learner during a joint consensus transition, - * we cannot add the learner directly when entering the joint state. This is - * because this would violate the invariant that the intersection of - * voters and learners is empty. For example, assume a Voter is removed and - * immediately re-added as a learner (or in other words, it is demoted): - * - * Initially, the configuration will be - * - * voters: {1 2 3} - * learners: {} - * - * and we want to demote 3. Entering the joint configuration, we naively get - * - * voters: {1 2} & {1 2 3} - * learners: {3} - * - * but this violates the invariant (3 is both voter and learner). Instead, - * we get - * - * voters: {1 2} & {1 2 3} - * learners: {} - * next_learners: {3} - * - * Where 3 is now still purely a voter, but we are remembering the intention - * to make it a learner upon transitioning into the final configuration: - * - * voters: {1 2} - * learners: {3} - * next_learners: {} - * - * Note that next_learners is not used while adding a learner that is not - * also a voter in the joint config. In this case, the learner is added - * right away when entering the joint configuration, so that it is caught up - * as soon as possible. - **/ - SyncNodeId learnersNext[TSDB_MAX_REPLICA]; + // Learners is a set of IDs corresponding to the learners active in the + // current configuration. + // + // Invariant: Learners and Voters does not intersect, i.e. if a peer is in + // either half of the joint config, it can't be a learner; if it is a + // learner it can't be in either half of the joint config. This invariant + // simplifies the implementation since it allows peers to have clarity about + // its current role without taking into account joint consensus. + SSyncRaftNodeMap learners; + + // When we turn a voter into a learner during a joint consensus transition, + // we cannot add the learner directly when entering the joint state. This is + // because this would violate the invariant that the intersection of + // voters and learners is empty. For example, assume a Voter is removed and + // immediately re-added as a learner (or in other words, it is demoted): + // + // Initially, the configuration will be + // + // voters: {1 2 3} + // learners: {} + // + // and we want to demote 3. Entering the joint configuration, we naively get + // + // voters: {1 2} & {1 2 3} + // learners: {3} + // + // but this violates the invariant (3 is both voter and learner). Instead, + // we get + // + // voters: {1 2} & {1 2 3} + // learners: {} + // next_learners: {3} + // + // Where 3 is now still purely a voter, but we are remembering the intention + // to make it a learner upon transitioning into the final configuration: + // + // voters: {1 2} + // learners: {3} + // next_learners: {} + // + // Note that next_learners is not used while adding a learner that is not + // also a voter in the joint config. In this case, the learner is added + // right away when entering the joint configuration, so that it is caught up + // as soon as possible. + SSyncRaftNodeMap learnersNext; }; struct SSyncRaftProgressTracker { SSyncRaftProgressTrackerConfig config; - SSyncRaftProgress progressMap[TSDB_MAX_REPLICA]; + SSyncRaftProgressMap progressMap; - ESyncRaftVoteResult votes[TSDB_MAX_REPLICA]; + ESyncRaftVoteType votes[TSDB_MAX_REPLICA]; int maxInflightMsgs; }; @@ -104,6 +99,10 @@ void syncRaftProgressVisit(SSyncRaftProgressTracker*, visitProgressFp visit, voi **/ void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, int i, bool grant); +void syncRaftCloneTrackerConfig(const SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressTrackerConfig* result); + +int syncRaftCheckProgress(const SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); + /** * syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the * election outcome is known. diff --git a/source/libs/sync/inc/sync_raft_proto.h b/source/libs/sync/inc/sync_raft_proto.h new file mode 100644 index 0000000000..49d706875f --- /dev/null +++ b/source/libs/sync/inc/sync_raft_proto.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TD_SYNC_RAFT_PROTO_H +#define TD_SYNC_RAFT_PROTO_H + +#include "sync_type.h" + +typedef enum ESyncRaftConfChangeType { + SYNC_RAFT_Conf_AddNode = 0, + SYNC_RAFT_Conf_RemoveNode = 1, + SYNC_RAFT_Conf_UpdateNode = 2, + SYNC_RAFT_Conf_AddLearnerNode = 2, +} ESyncRaftConfChangeType; + +// ConfChangeSingle is an individual configuration change operation. Multiple +// such operations can be carried out atomically via a ConfChangeV2. +typedef struct SSyncConfChangeSingle { + ESyncRaftConfChangeType type; + SyncNodeId nodeId; +} SSyncConfChangeSingle; + +typedef struct SSyncConfChangeSingleArray { + int n; + SSyncConfChangeSingle* changes; +} SSyncConfChangeSingleArray; + +typedef struct SSyncConfigState { + // The voters in the incoming config. (If the configuration is not joint, + // then the outgoing config is empty). + SSyncRaftNodeMap voters; + + // The learners in the incoming config. + SSyncRaftNodeMap learners; + + // The voters in the outgoing config. + SSyncRaftNodeMap votersOutgoing; + + // The nodes that will become learners when the outgoing config is removed. + // These nodes are necessarily currently in nodes_joint (or they would have + // been added to the incoming config right away). + SSyncRaftNodeMap learnersNext; + + // If set, the config is joint and Raft will automatically transition into + // the final config (i.e. remove the outgoing config) when this is safe. + bool autoLeave; +} SSyncConfigState; + +#endif /* TD_SYNC_RAFT_PROTO_H */ diff --git a/source/libs/sync/inc/sync_raft_quorum_joint.h b/source/libs/sync/inc/sync_raft_quorum_joint.h index dec1c39d90..103a147de3 100644 --- a/source/libs/sync/inc/sync_raft_quorum_joint.h +++ b/source/libs/sync/inc/sync_raft_quorum_joint.h @@ -25,14 +25,34 @@ * majority configurations. Decisions require the support of both majorities. **/ typedef struct SSyncRaftQuorumJointConfig { - SSyncCluster majorityConfig[2]; -}SSyncRaftQuorumJointConfig; + SSyncCluster outgoing; + SSyncCluster incoming; +} SSyncRaftQuorumJointConfig; /** * syncRaftVoteResult takes a mapping of voters to yes/no (true/false) votes and returns * a result indicating whether the vote is pending, lost, or won. A joint quorum * requires both majority quorums to vote in favor. **/ -ESyncRaftVoteResult syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteResult* votes); +ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteType* votes); + +static FORCE_INLINE bool syncRaftJointConfigInCluster(const SSyncCluster* cluster, SyncNodeId id) { + int i; + for (i = 0; i < cluster->replica; ++i) { + if (cluster->nodeInfo[i].nodeId == id) { + return true; + } + } + + return false; +} + +static FORCE_INLINE bool syncRaftJointConfigInOutgoing(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) { + return syncRaftJointConfigInCluster(&config->outgoing, id); +} + +static FORCE_INLINE bool syncRaftJointConfigInIncoming(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) { + return syncRaftJointConfigInCluster(&config->incoming, id); +} #endif /* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */ diff --git a/source/libs/sync/inc/sync_raft_quorum_majority.h b/source/libs/sync/inc/sync_raft_quorum_majority.h index 6955c7a072..8f148873e3 100644 --- a/source/libs/sync/inc/sync_raft_quorum_majority.h +++ b/source/libs/sync/inc/sync_raft_quorum_majority.h @@ -18,6 +18,7 @@ #include "sync.h" #include "sync_type.h" +#include "sync_raft_quorum.h" /** * syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns @@ -25,6 +26,6 @@ * yes/no has been reached), won (a quorum of yes has been reached), or lost (a * quorum of no has been reached). **/ -ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteResult* votes); +ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteType* votes); #endif /* _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H */ diff --git a/source/libs/sync/inc/sync_raft_restore.h b/source/libs/sync/inc/sync_raft_restore.h new file mode 100644 index 0000000000..fc65ae7440 --- /dev/null +++ b/source/libs/sync/inc/sync_raft_restore.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TD_SYNC_RAFT_RESTORE_H +#define TD_SYNC_RAFT_RESTORE_H + +#include "sync_type.h" +#include "sync_raft_proto.h" + +int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs, + SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); + +#endif /* TD_SYNC_RAFT_RESTORE_H */ diff --git a/source/libs/sync/inc/sync_type.h b/source/libs/sync/inc/sync_type.h index 3925323415..a7977f318e 100644 --- a/source/libs/sync/inc/sync_type.h +++ b/source/libs/sync/inc/sync_type.h @@ -17,6 +17,7 @@ #define _TD_LIBS_SYNC_TYPE_H #include +#include "sync.h" #include "osMath.h" #define SYNC_NON_NODE_ID -1 @@ -28,10 +29,13 @@ typedef uint32_t SyncTick; typedef struct SSyncRaft SSyncRaft; typedef struct SSyncRaftProgress SSyncRaftProgress; +typedef struct SSyncRaftProgressMap SSyncRaftProgressMap; typedef struct SSyncRaftProgressTrackerConfig SSyncRaftProgressTrackerConfig; typedef struct SSyncRaftProgressTracker SSyncRaftProgressTracker; +typedef struct SSyncRaftChanger SSyncRaftChanger; + typedef struct SSyncRaftLog SSyncRaftLog; typedef struct SSyncRaftEntry SSyncRaftEntry; @@ -46,6 +50,11 @@ typedef struct SSyncRaftEntry SSyncRaftEntry; #endif #endif +typedef struct { + int32_t replica; + SyncNodeId nodeId[TSDB_MAX_REPLICA]; +} SSyncRaftNodeMap; + typedef enum { SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0, SYNC_RAFT_CAMPAIGN_ELECTION = 1, @@ -61,6 +70,6 @@ typedef enum { //reject the vote request SYNC_RAFT_VOTE_RESP_REJECT = 2, -} ESyncRaftVoteResult; +} ESyncRaftVoteType; #endif /* _TD_LIBS_SYNC_TYPE_H */ diff --git a/source/libs/sync/src/sync_raft_config_change.c b/source/libs/sync/src/sync_raft_config_change.c new file mode 100644 index 0000000000..e99da0e226 --- /dev/null +++ b/source/libs/sync/src/sync_raft_config_change.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "syncInt.h" +#include "sync_raft_config_change.h" +#include "sync_raft_progress.h" +#include "sync_raft_progress_tracker.h" + +static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); +static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); +static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); +static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); +static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config); +static int applyConfig(SSyncRaftChanger* changer, const SSyncRaftProgressTrackerConfig* config, + const SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css); + +// Simple carries out a series of configuration changes that (in aggregate) +// mutates the incoming majority config Voters[0] by at most one. This method +// will return an error if that is not the case, if the resulting quorum is +// zero, or if the configuration is in a joint state (i.e. if there is an +// outgoing configuration). +int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css, + SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { + int ret; + + ret = checkAndCopy(changer, config, progressMap); + if (ret != 0) { + return ret; + } + + if (hasJointConfig(config)) { + return -1; + } + + ret = applyConfig(changer, config, progressMap, css); + if (ret != 0) { + return ret; + } + + return checkAndReturn(config, progressMap); +} + +// checkAndCopy copies the tracker's config and progress map (deeply enough for +// the purposes of the Changer) and returns those copies. It returns an error +// if checkInvariants does. +static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { + syncRaftCloneTrackerConfig(&changer->tracker->config, config); + int i; + for (i = 0; i < TSDB_MAX_REPLICA; ++i) { + SSyncRaftProgress* progress = &(changer->tracker->progressMap.progress[i]); + if (progress->id == SYNC_NON_NODE_ID) { + continue; + } + syncRaftProgressCopy(progress, &(progressMap->progress[i])); + } + return checkAndReturn(config, progressMap); +} + +// checkAndReturn calls checkInvariants on the input and returns either the +// resulting error or the input. +static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { + if (checkInvariants(config, progressMap) != 0) { + return -1; + } + + return 0; +} + +// checkInvariants makes sure that the config and progress are compatible with +// each other. This is used to check both what the Changer is initialized with, +// as well as what it returns. +static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { + int ret = syncRaftCheckProgress(config, progressMap); + if (ret != 0) { + return ret; + } + + int i; + // Any staged learner was staged because it could not be directly added due + // to a conflicting voter in the outgoing config. + for (i = 0; i < TSDB_MAX_REPLICA; ++i) { + if (!syncRaftJointConfigInOutgoing(&config->voters, config->learnersNext.nodeId[i])) { + return -1; + } + if (progressMap->progress[i].id != SYNC_NON_NODE_ID && progressMap->progress[i].isLearner) { + syncError("%d is in LearnersNext, but is already marked as learner", progressMap->progress[i].id); + return -1; + } + } + // Conversely Learners and Voters doesn't intersect at all. + for (i = 0; i < TSDB_MAX_REPLICA; ++i) { + if (syncRaftJointConfigInIncoming(&config->voters, config->learners.nodeId[i])) { + syncError("%d is in Learners and voter.incoming", progressMap->progress[i].id); + return -1; + } + if (progressMap->progress[i].id != SYNC_NON_NODE_ID && !progressMap->progress[i].isLearner) { + syncError("%d is in Learners, but is not marked as learner", progressMap->progress[i].id); + return -1; + } + } + + if (!hasJointConfig(config)) { + // We enforce that empty maps are nil instead of zero. + if (config->learnersNext.replica > 0) { + syncError("cfg.LearnersNext must be nil when not joint"); + return -1; + } + if (config->autoLeave) { + syncError("AutoLeave must be false when not joint"); + return -1; + } + } + + return 0; +} + +static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config) { + return config->voters.outgoing.replica > 0; +} + +static int applyConfig(SSyncRaftChanger* changer, const SSyncRaftProgressTrackerConfig* config, + const SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css) { + int i; + + for (i = 0; i < css->n; ++i) { + const SSyncConfChangeSingle* cs = &(css->changes[i]); + if (cs->nodeId == SYNC_NON_NODE_ID) { + continue; + } + + ESyncRaftConfChangeType type = cs->type; + switch (type) { + + } + } + + if (config->voters.incoming.replica == 0) { + return -1; + } + + return 0; +} \ No newline at end of file diff --git a/source/libs/sync/src/sync_raft_impl.c b/source/libs/sync/src/sync_raft_impl.c index 4d1f66a474..5e23474a89 100644 --- a/source/libs/sync/src/sync_raft_impl.c +++ b/source/libs/sync/src/sync_raft_impl.c @@ -243,7 +243,7 @@ static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) { syncRaftLogAppend(pRaft->log, entries, n); - SSyncRaftProgress* progress = &(pRaft->tracker->progressMap[pRaft->cluster.selfIndex]); + SSyncRaftProgress* progress = &(pRaft->tracker->progressMap.progress[pRaft->cluster.selfIndex]); syncRaftProgressMaybeUpdate(progress, lastIndex); // Regardless of maybeCommit's return, our caller will call bcastAppend. maybeCommit(pRaft); diff --git a/source/libs/sync/src/sync_raft_progress.c b/source/libs/sync/src/sync_raft_progress.c index df954402f8..e7de3fcf98 100644 --- a/source/libs/sync/src/sync_raft_progress.c +++ b/source/libs/sync/src/sync_raft_progress.c @@ -149,6 +149,10 @@ void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snaps progress->pendingSnapshotIndex = snapshotIndex; } +void syncRaftProgressCopy(const SSyncRaftProgress* progress, SSyncRaftProgress* out) { + +} + /** * ResetState moves the Progress into the specified State, resetting ProbeSent, * PendingSnapshot, and Inflights. diff --git a/source/libs/sync/src/sync_raft_progress_tracker.c b/source/libs/sync/src/sync_raft_progress_tracker.c index f5da538e1a..525b2eec1a 100644 --- a/source/libs/sync/src/sync_raft_progress_tracker.c +++ b/source/libs/sync/src/sync_raft_progress_tracker.c @@ -25,13 +25,13 @@ SSyncRaftProgressTracker* syncRaftOpenProgressTracker() { } void syncRaftResetVotes(SSyncRaftProgressTracker* tracker) { - memset(tracker->votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(ESyncRaftVoteResult) * TSDB_MAX_REPLICA); + memset(tracker->votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(ESyncRaftVoteType) * TSDB_MAX_REPLICA); } void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp visit, void* arg) { int i; for (i = 0; i < TSDB_MAX_REPLICA; ++i) { - SSyncRaftProgress* progress = &(tracker->progressMap[i]); + SSyncRaftProgress* progress = &(tracker->progressMap.progress[i]); visit(i, progress, arg); } } @@ -44,6 +44,10 @@ void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, int i, bool grant) { tracker->votes[i] = grant ? SYNC_RAFT_VOTE_RESP_GRANT : SYNC_RAFT_VOTE_RESP_REJECT; } +void syncRaftCloneTrackerConfig(const SSyncRaftProgressTrackerConfig* from, SSyncRaftProgressTrackerConfig* to) { + +} + /** * syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the * election outcome is known. @@ -54,7 +58,7 @@ ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* r int r, g; for (i = 0, r = 0, g = 0; i < TSDB_MAX_REPLICA; ++i) { - progress = &(tracker->progressMap[i]); + progress = &(tracker->progressMap.progress[i]); if (progress->id == SYNC_NON_NODE_ID) { continue; } diff --git a/source/libs/sync/src/sync_raft_quorum_joint.c b/source/libs/sync/src/sync_raft_quorum_joint.c index 9084e1868a..f8b5463ad8 100644 --- a/source/libs/sync/src/sync_raft_quorum_joint.c +++ b/source/libs/sync/src/sync_raft_quorum_joint.c @@ -22,9 +22,9 @@ * a result indicating whether the vote is pending, lost, or won. A joint quorum * requires both majority quorums to vote in favor. **/ -ESyncRaftVoteResult syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteResult* votes) { - ESyncRaftVoteResult r1 = syncRaftMajorityVoteResult(&(config->majorityConfig[0]), votes); - ESyncRaftVoteResult r2 = syncRaftMajorityVoteResult(&(config->majorityConfig[1]), votes); +ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteType* votes) { + ESyncRaftVoteResult r1 = syncRaftMajorityVoteResult(&(config->incoming), votes); + ESyncRaftVoteResult r2 = syncRaftMajorityVoteResult(&(config->outgoing), votes); if (r1 == r2) { // If they agree, return the agreed state. diff --git a/source/libs/sync/src/sync_raft_quorum_majority.c b/source/libs/sync/src/sync_raft_quorum_majority.c index 0d13998112..0361845414 100644 --- a/source/libs/sync/src/sync_raft_quorum_majority.c +++ b/source/libs/sync/src/sync_raft_quorum_majority.c @@ -22,7 +22,7 @@ * yes/no has been reached), won (a quorum of yes has been reached), or lost (a * quorum of no has been reached). **/ -ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteResult* votes) { +ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteType* votes) { if (config->replica == 0) { return SYNC_RAFT_VOTE_WON; } diff --git a/source/libs/sync/src/sync_raft_restore.c b/source/libs/sync/src/sync_raft_restore.c new file mode 100644 index 0000000000..c7dfaa07b4 --- /dev/null +++ b/source/libs/sync/src/sync_raft_restore.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "sync_raft_config_change.h" +#include "sync_raft_restore.h" +#include "sync_raft_progress_tracker.h" + +static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in); + +int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs, + SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { + SSyncConfChangeSingleArray outgoing; + SSyncConfChangeSingleArray incoming; + SSyncConfChangeSingleArray css; + int i, ret; + + ret = toConfChangeSingle(cs, &outgoing, &incoming); + if (ret != 0) { + goto out; + } + + if (outgoing.n == 0) { + // No outgoing config, so just apply the incoming changes one by one. + for (i = 0; i < incoming.n; ++i) { + css = (SSyncConfChangeSingleArray) { + .n = 1, + .changes = &incoming.changes[i], + }; + ret = syncRaftChangerSimpleConfig(changer, &css, config, progressMap); + if (ret != 0) { + goto out; + } + syncRaftCloneTrackerConfig(config, &changer->tracker->config); + syncRaftProgressMapCopy(progressMap, &changer->tracker->progressMap); + } + } else { + // The ConfState describes a joint configuration. + // + // First, apply all of the changes of the outgoing config one by one, so + // that it temporarily becomes the incoming active config. For example, + // if the config is (1 2 3)&(2 3 4), this will establish (2 3 4)&(). + for (i = 0; i < outgoing.n; ++i) { + css = (SSyncConfChangeSingleArray) { + .n = 1, + .changes = &outgoing.changes[i], + }; + ret = syncRaftChangerSimpleConfig(changer, &css, config, progressMap); + if (ret != 0) { + goto out; + } + syncRaftCloneTrackerConfig(config, &changer->tracker->config); + syncRaftProgressMapCopy(progressMap, &changer->tracker->progressMap); + } + + ret = syncRaftChangerEnterJoint(changer, &incoming, config, progressMap); + if (ret != 0) { + goto out; + } + syncRaftCloneTrackerConfig(config, &changer->tracker->config); + syncRaftProgressMapCopy(progressMap, &changer->tracker->progressMap); + } + +out: + if (incoming.n != 0) free(incoming.changes); + if (outgoing.n != 0) free(outgoing.changes); + return ret; +} + +// toConfChangeSingle translates a conf state into 1) a slice of operations creating +// first the config that will become the outgoing one, and then the incoming one, and +// b) another slice that, when applied to the config resulted from 1), represents the +// ConfState. +static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in) { + int i; + + out->n = in->n = 0; + + out->n = cs->votersOutgoing.replica; + out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * out->n); + if (out->changes == NULL) { + out->n = 0; + return -1; + } + in->n = cs->votersOutgoing.replica + cs->voters.replica + cs->learners.replica + cs->learnersNext.replica; + out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * in->n); + if (in->changes == NULL) { + in->n = 0; + return -1; + } + + // Example to follow along this code: + // voters=(1 2 3) learners=(5) outgoing=(1 2 4 6) learners_next=(4) + // + // This means that before entering the joint config, the configuration + // had voters (1 2 4 6) and perhaps some learners that are already gone. + // The new set of voters is (1 2 3), i.e. (1 2) were kept around, and (4 6) + // are no longer voters; however 4 is poised to become a learner upon leaving + // the joint state. + // We can't tell whether 5 was a learner before entering the joint config, + // but it doesn't matter (we'll pretend that it wasn't). + // + // The code below will construct + // outgoing = add 1; add 2; add 4; add 6 + // incoming = remove 1; remove 2; remove 4; remove 6 + // add 1; add 2; add 3; + // add-learner 5; + // add-learner 4; + // + // So, when starting with an empty config, after applying 'outgoing' we have + // + // quorum=(1 2 4 6) + // + // From which we enter a joint state via 'incoming' + // + // quorum=(1 2 3)&&(1 2 4 6) learners=(5) learners_next=(4) + // + // as desired. + + for (i = 0; i < cs->votersOutgoing.replica; ++i) { + // If there are outgoing voters, first add them one by one so that the + // (non-joint) config has them all. + out->changes[i] = (SSyncConfChangeSingle) { + .type = SYNC_RAFT_Conf_AddNode, + .nodeId = cs->votersOutgoing.nodeId[i], + }; + } + + // We're done constructing the outgoing slice, now on to the incoming one + // (which will apply on top of the config created by the outgoing slice). + + // First, we'll remove all of the outgoing voters. + int j = 0; + for (i = 0; i < cs->votersOutgoing.replica; ++i) { + in->changes[j] = (SSyncConfChangeSingle) { + .type = SYNC_RAFT_Conf_RemoveNode, + .nodeId = cs->votersOutgoing.nodeId[i], + }; + j += 1; + } + // Then we'll add the incoming voters and learners. + for (i = 0; i < cs->voters.replica; ++i) { + in->changes[j] = (SSyncConfChangeSingle) { + .type = SYNC_RAFT_Conf_AddNode, + .nodeId = cs->voters.nodeId[i], + }; + j += 1; + } + for (i = 0; i < cs->learners.replica; ++i) { + in->changes[j] = (SSyncConfChangeSingle) { + .type = SYNC_RAFT_Conf_AddLearnerNode, + .nodeId = cs->learners.nodeId[i], + }; + j += 1; + } + // Same for LearnersNext; these are nodes we want to be learners but which + // are currently voters in the outgoing config. + for (i = 0; i < cs->learnersNext.replica; ++i) { + in->changes[j] = (SSyncConfChangeSingle) { + .type = SYNC_RAFT_Conf_AddLearnerNode, + .nodeId = cs->learnersNext.nodeId[i], + }; + j += 1; + } + return 0; +} \ No newline at end of file -- GitLab