Cluster: refactoring of slots update code and more.

The commit also introduces detection of nodes publishing not updated configuration. More work in progress to send an UPDATE packet to inform of the config change.

Cluster: refactoring of slots update code and more.
The commit also introduces detection of nodes publishing not updated configuration. More work in progress to send an UPDATE packet to inform of the config change.
6c6572be · antirez · d3588dc1 · 6c6572be
显示空白变更内容
内联并排

Showing with 101 addition and 57 deletion

src/cluster.c src/cluster.c +101 -57

未找到文件。
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -1029,7 +1029,7 @@ int clusterProcessPacket(clusterLink *link) {
            }
        }

-        /* Update master/slave state */
+        /* Check for role switch: slave -> master or master -> slave. */
        if (sender) {
            if (!memcmp(hdr->slaveof,REDIS_NODE_NULL_NAME,
                sizeof(hdr->slaveof)))
@@ -1079,14 +1079,34 @@ int clusterProcessPacket(clusterLink *link) {
        }

        /* Update our info about served slots.
+         *
         * Note: this MUST happen after we update the master/slave state
         * so that REDIS_NODE_MASTER flag will be set. */
-        if (sender && sender->flags & REDIS_NODE_MASTER) {
-            int changes, j;

-            changes =
-                memcmp(sender->slots,hdr->myslots,sizeof(hdr->myslots)) != 0;
-            if (changes) {
+        /* Many checks are only needed if the set of served slots this
+         * instance claims is different compared to the set of slots we have for
+         * it. Check this ASAP to avoid other computational expansive checks later. */
+        clusterNode *sender_master = NULL; /* Sender or its master if it is a slave. */
+        int dirty_slots = 0; /* Sender claimed slots don't match my view? */
+
+        if (sender) {
+            sender_master = (sender->flags & REDIS_NODE_MASTER) ? sender :
+                                                                  sender->slaveof;
+            if (sender_master) {
+                dirty_slots = memcmp(sender_master->slots,
+                        hdr->myslots,sizeof(hdr->myslots)) != 0;
+            }
+        }
+
+        /* 1) We check if the sender (master) is claiming slots that we belive to
+         *    serve as a master, or replicate as a slave, but with a configEpoch
+         *    that is newer: in that case we rebind the slots to the claiming node
+         *    in our routing table.
+         *
+         *    Moreover if we are left with 0 slots to serve, we reconfigure as
+         *    a replica of the sender. */
+        if (sender && sender->flags & REDIS_NODE_MASTER && dirty_slots) {
+            int j;
            clusterNode *curmaster, *newmaster = NULL;

            /* Here we set curmaster to this node or the node this node
@@ -1115,15 +1135,6 @@ int clusterProcessPacket(clusterLink *link) {
                                             CLUSTER_TODO_UPDATE_STATE|
                                             CLUSTER_TODO_FSYNC_CONFIG);
                    }
-                    } else {
-                        /* This node claims to no longer handling the slot,
-                         * however we don't change our config as this is likely:
-                         * 1) Rehashing in progress.
-                         * 2) Failover.
-                         * In both cases we'll be informed about who is serving
-                         * the slot eventually. In the meantime it's up to the
-                         * original owner to try to redirect our clients to the
-                         * right node. */
                }
            }

@@ -1142,6 +1153,39 @@ int clusterProcessPacket(clusterLink *link) {
                                     CLUSTER_TODO_FSYNC_CONFIG);
            }
        }
+
+        /* 2) We also check for the reverse condition, that is, the sender claims
+         *    to serve slots we know are served by a master with a greater
+         *    configEpoch. If this happens we inform the sender.
+         *
+         * This is useful because sometimes after a partition heals, a reappearing
+         * master may be the last one to claim a given set of hash slots, but with
+         * a configuration that other instances know to be deprecated. Example:
+         *
+         * A and B are master and slave for slots 1,2,3.
+         * A is partitioned away, B gets promoted.
+         * B is partitioned away, and A returns available.
+         *
+         * Usually B would PING A publishing its set of served slots and its
+         * configEpoch, but because of the partition B can't inform A of the new
+         * configuration, so other nodes that have an updated table must do it.
+         * In this way A will stop to act as a master (or can try to failover if
+         * there are the conditions to win the election). */
+        if (sender && dirty_slots) {
+            int j;
+
+            for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+                if (bitmapTestBit(hdr->myslots,j)) {
+                    if (server.cluster->slots[j] == sender ||
+                        server.cluster->slots[j] == NULL) continue;
+                    if (server.cluster->slots[j]->configEpoch >
+                        senderConfigEpoch)
+                    {
+                        printf("MASTER or SLAVE have old config\n");
+                        break;
+                    }
+                }
+            }
        }

        /* Get info from the gossip section */