提交 9b07606b 编写于 作者: P Philipp Heckel

Only disable semi sync master if enabled; always disable semi-sync

master; add IsDowntimed check back
上级 1aac5063
......@@ -584,7 +584,7 @@ func StartReplicationUntilMasterCoordinates(instanceKey *InstanceKey, masterCoor
// MaybeDisableSemiSyncMaster always disables the semi-sync master (rpl_semi_sync_master_enabled) if the semi-sync priority is > 0. This is
// a little odd but in line with the legacy behavior and we really should disable the semi-sync master flag for replicas when starting replication.
func MaybeDisableSemiSyncMaster(replicaInstance *Instance) (*Instance, error) {
if replicaInstance.SemiSyncPriority > 0 {
if replicaInstance.SemiSyncPriority > 0 && replicaInstance.SemiSyncMasterEnabled {
log.Infof("semi-sync: %s: setting rpl_semi_sync_master_enabled: %t", &replicaInstance.Key, false)
replicaInstance, err := SetSemiSyncMaster(&replicaInstance.Key, false)
if err != nil {
......@@ -609,7 +609,7 @@ func MaybeEnableSemiSyncReplica(replicaInstance *Instance) (*Instance, error) {
// New logic: If EnforceExactSemiSyncReplicas or RecoverLockedSemiSyncMaster are set, we enable semi-sync only if the
// given replica instance is in the list of replicas to have semi-sync enabled (according to the priority).
_, actions, err := AnalyzeSemiSyncReplicaTopology(&replicaInstance.MasterKey, &replicaInstance.Key, config.Config.EnforceExactSemiSyncReplicas)
_, _, actions, err := AnalyzeSemiSyncReplicaTopology(&replicaInstance.MasterKey, &replicaInstance.Key, config.Config.EnforceExactSemiSyncReplicas)
if err != nil {
return replicaInstance, log.Errorf("semi-sync: %s", err.Error())
}
......@@ -639,15 +639,15 @@ func maybeEnableSemiSyncReplicaLegacy(replicaInstance *Instance) (*Instance, err
// AnalyzeSemiSyncReplicaTopology analyzes the replica topology for the given master and determines actions for the semi-sync replica enabled
// variable. It does not take any action itself.
func AnalyzeSemiSyncReplicaTopology(masterKey *InstanceKey, includeNonReplicatingInstance *InstanceKey, exactReplicaTopology bool) (masterInstance *Instance, actions map[*Instance]bool, err error) {
func AnalyzeSemiSyncReplicaTopology(masterKey *InstanceKey, includeNonReplicatingInstance *InstanceKey, exactReplicaTopology bool) (masterInstance *Instance, replicas []*Instance, actions map[*Instance]bool, err error) {
// Read entire topology of master and its replicas
masterInstance, err = ReadTopologyInstance(masterKey)
if err != nil {
return nil, nil, err
return nil, nil, nil, err
}
replicas, err := ReadReplicaInstances(masterKey)
replicas, err = ReadReplicaInstances(masterKey)
if err != nil {
return nil, nil, err
return nil, nil, nil, err
}
// Classify and prioritize replicas & figure out which replicas need to be acted upon
......@@ -655,7 +655,7 @@ func AnalyzeSemiSyncReplicaTopology(masterKey *InstanceKey, includeNonReplicatin
actions = determineSemiSyncReplicaActions(masterInstance, possibleSemiSyncReplicas, asyncReplicas, exactReplicaTopology)
logSemiSyncReplicaAnalysis(masterInstance, possibleSemiSyncReplicas, asyncReplicas, excludedReplicas, actions)
return masterInstance, actions, nil
return masterInstance, replicas, actions, nil
}
// classifyAndPrioritizeReplicas takes a list of replica instances and classifies them based on their semi-sync priority, excluding replicas
......@@ -666,9 +666,10 @@ func classifyAndPrioritizeReplicas(replicas []*Instance, includeNonReplicatingIn
asyncReplicas = make([]*Instance, 0)
excludedReplicas = make([]*Instance, 0)
for _, replica := range replicas {
if !replica.IsLastCheckValid || (!replica.Key.Equals(includeNonReplicatingInstance) && !replica.ReplicaRunning()) {
isReplicating := replica.Key.Equals(includeNonReplicatingInstance) || replica.ReplicaRunning()
if !replica.IsLastCheckValid || !isReplicating {
excludedReplicas = append(excludedReplicas, replica)
} else if replica.SemiSyncPriority == 0 {
} else if replica.SemiSyncPriority == 0 || replica.IsDowntimed {
asyncReplicas = append(asyncReplicas, replica)
} else {
possibleSemiSyncReplicas = append(possibleSemiSyncReplicas, replica)
......@@ -756,7 +757,7 @@ func logSemiSyncReplicaList(description string, replicas []*Instance) {
if len(replicas) > 0 {
log.Debugf("semi-sync: %s:", description)
for _, replica := range replicas {
log.Debugf("semi-sync: - %s: semi-sync enabled = %t, priority = %d, promotion rule = %s, last check = %t, replicating = %t", replica.Key.String(), replica.SemiSyncReplicaEnabled, replica.SemiSyncPriority, replica.PromotionRule, replica.IsLastCheckValid, replica.ReplicaRunning())
log.Debugf("semi-sync: - %s: semi-sync enabled = %t, priority = %d, promotion rule = %s, downtimed = %t, last check = %t, replicating = %t", replica.Key.String(), replica.SemiSyncReplicaEnabled, replica.SemiSyncPriority, replica.PromotionRule, replica.IsDowntimed, replica.IsLastCheckValid, replica.ReplicaRunning())
}
} else {
log.Debugf("semi-sync: %s: (none)", description)
......
......@@ -1510,7 +1510,7 @@ func checkAndRecoverMasterWithTooManySemiSyncReplicas(analysisEntry inst.Replica
// variable (rpl_semi_sync_replica_enabled) of the replicas depending on their semi-sync priority and promotion rule. If exactReplicaTopology, the function will only ever enable
// semi-sync on replicas and never disable it.
func recoverSemiSyncReplicas(topologyRecovery *TopologyRecovery, analysisEntry inst.ReplicationAnalysis, exactReplicaTopology bool) (recoveryAttempted bool, topologyRecoveryOut *TopologyRecovery, err error) {
masterInstance, actions, err := inst.AnalyzeSemiSyncReplicaTopology(&analysisEntry.AnalyzedInstanceKey, nil, exactReplicaTopology)
masterInstance, replicas, actions, err := inst.AnalyzeSemiSyncReplicaTopology(&analysisEntry.AnalyzedInstanceKey, nil, exactReplicaTopology)
if err != nil {
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("semi-sync: %s", err.Error()))
return true, topologyRecovery, log.Errorf("semi-sync: %s", err.Error())
......@@ -1519,6 +1519,12 @@ func recoverSemiSyncReplicas(topologyRecovery *TopologyRecovery, analysisEntry i
return true, topologyRecovery, log.Errorf("cannot determine actions based on possible semi-sync replicas; cannot recover on %+v", &analysisEntry.AnalyzedInstanceKey)
}
// Disable semi-sync master on all replicas; this is to avoid semi-sync failures on the replicas (rpl_semi_sync_master_no_tx)
// and to make it consistent with the logic in SetReadOnly
for _, replica := range replicas {
inst.MaybeDisableSemiSyncMaster(replica) // it's okay if this fails
}
// Take action: we first enable and then disable (two loops) in order to avoid "locked master" scenarios
AuditTopologyRecovery(topologyRecovery, "semi-sync: taking actions:")
for replica, enable := range actions {
......@@ -1537,7 +1543,6 @@ func recoverSemiSyncReplicas(topologyRecovery *TopologyRecovery, analysisEntry i
}
}
}
// TODO even though we resolve correctly here, we are re-triggering the same analysis until the next polling interval. WHY?
resolveRecovery(topologyRecovery, masterInstance)
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("semi-sync: recovery complete; success = %t", topologyRecovery.IsSuccessful))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册