提交 07888202 编写于 作者: A antirez

PSYNC: work in progress, preview #2, rebased to unstable.

上级 e34a35a5
......@@ -227,6 +227,28 @@ slave-read-only yes
# be a good idea.
repl-disable-tcp-nodelay no
# Set the replication backlog size. The backlog is a buffer that accumulates
# slave data when slaves are disconnected for some time, so that when a slave
# wants to reconnect again, often a full resync is not needed, but a partial
# resync is enough, just passing the portion of data the slave missed while
# disconnected.
#
# The biggest the replication backlog, the longer the time the slave can be
# disconnected and later be able to perform a partial resynchronization.
#
# The backlog is only allocated once there is at least a slave connected.
#
# repl-backlog-size 1mb
# After a master has no longer connected slaves for some time, the backlog
# will be freed. The following option configures the amount of seconds that
# need to elapse, starting from the time the last slave disconnected, for
# the backlog buffer to be freed.
#
# A value of 0 means to never release the backlog.
#
# repl-backlog-ttl 3600
# The slave priority is an integer number published by Redis in the INFO output.
# It is used by Redis Sentinel in order to select a slave to promote into a
# master if the master is no longer working correctly.
......
......@@ -238,6 +238,18 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"repl-disable-tcp-nodelay") && argc==2) {
if ((server.repl_disable_tcp_nodelay = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
} else if (!strcasecmp(argv[0],"repl-backlog-size") && argc == 2) {
long long size = strtoll(argv[0],NULL,10);
if (size <= 0) {
err = "repl-backlog-size must be 1 or greater.";
goto loaderr;
}
resizeReplicationBacklog(size);
} else if (!strcasecmp(argv[0],"repl-backlog-ttl") && argc == 2) {
server.repl_backlog_time_limit = atoi(argv[1]);
if (server.repl_backlog_time_limit < 0) {
err = "repl-backlog-ttl can't be negative ";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
server.masterauth = zstrdup(argv[1]);
......@@ -719,6 +731,12 @@ void configSetCommand(redisClient *c) {
} else if (!strcasecmp(c->argv[2]->ptr,"repl-timeout")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
server.repl_timeout = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"repl-backlog-size")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
resizeReplicationBacklog(ll);
} else if (!strcasecmp(c->argv[2]->ptr,"repl-backlog-ttl")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
server.repl_backlog_time_limit = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"watchdog-period")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
if (ll)
......@@ -832,6 +850,8 @@ void configGetCommand(redisClient *c) {
config_get_numerical_field("databases",server.dbnum);
config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period);
config_get_numerical_field("repl-timeout",server.repl_timeout);
config_get_numerical_field("repl-backlog-size",server.repl_backlog_size);
config_get_numerical_field("repl-backlog-ttl",server.repl_backlog_time_limit);
config_get_numerical_field("maxclients",server.maxclients);
config_get_numerical_field("watchdog-period",server.watchdog_period);
config_get_numerical_field("slave-priority",server.slave_priority);
......
......@@ -511,8 +511,7 @@ void propagateExpire(redisDb *db, robj *key) {
if (server.aof_state != REDIS_AOF_OFF)
feedAppendOnlyFile(server.delCommand,db->id,argv,2);
if (listLength(server.slaves))
replicationFeedSlaves(server.slaves,db->id,argv,2);
replicationFeedSlaves(server.slaves,db->id,argv,2);
decrRefCount(argv[0]);
decrRefCount(argv[1]);
......
......@@ -108,8 +108,7 @@ void execCommandReplicateMulti(redisClient *c) {
if (server.aof_state != REDIS_AOF_OFF)
feedAppendOnlyFile(server.multiCommand,c->db->id,&multistring,1);
if (listLength(server.slaves))
replicationFeedSlaves(server.slaves,c->db->id,&multistring,1);
replicationFeedSlaves(server.slaves,c->db->id,&multistring,1);
decrRefCount(multistring);
}
......
......@@ -87,6 +87,7 @@ redisClient *createClient(int fd) {
c->ctime = c->lastinteraction = server.unixtime;
c->authenticated = 0;
c->replstate = REDIS_REPL_NONE;
c->reploff = 0;
c->slave_listening_port = 0;
c->reply = listCreate();
c->reply_bytes = 0;
......@@ -595,12 +596,42 @@ void disconnectSlaves(void) {
}
}
/* This function is called when the slave lose the connection with the
* master into an unexpected way. */
void replicationHandleMasterDisconnection(void) {
server.master = NULL;
server.repl_state = REDIS_REPL_CONNECT;
server.repl_down_since = server.unixtime;
/* We lost connection with our master, force our slaves to resync
* with us as well to load the new data set.
*
* If server.masterhost is NULL the user called SLAVEOF NO ONE so
* slave resync is not needed. */
if (server.masterhost != NULL) disconnectSlaves();
}
void freeClient(redisClient *c) {
listNode *ln;
/* If this is marked as current client unset it */
if (server.current_client == c) server.current_client = NULL;
/* If it is our master that's beging disconnected we should make sure
* to cache the state to try a partial resynchronization later.
*
* Note that before doing this we make sure that the client is not in
* some unexpected state, by checking its flags. */
if (server.master &&
(c->flags & REDIS_MASTER) &&
!(c->flags & (REDIS_CLOSE_AFTER_REPLY|
REDIS_CLOSE_ASAP|
REDIS_BLOCKED|
REDIS_UNBLOCKED)))
{
replicationCacheMaster(c);
return;
}
/* Note that if the client we are freeing is blocked into a blocking
* call, we have to set querybuf to NULL *before* to call
* unblockClientWaitingData() to avoid processInputBuffer() will get
......@@ -620,16 +651,21 @@ void freeClient(redisClient *c) {
pubsubUnsubscribeAllPatterns(c,0);
dictRelease(c->pubsub_channels);
listRelease(c->pubsub_patterns);
/* Obvious cleanup */
aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
/* Close socket, unregister events, and remove list of replies and
* accumulated arguments. */
if (c->fd != -1) {
aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
close(c->fd);
}
listRelease(c->reply);
freeClientArgv(c);
close(c->fd);
/* Remove from the list of clients */
ln = listSearchKey(server.clients,c);
redisAssert(ln != NULL);
listDelNode(server.clients,ln);
if (c->fd != -1) {
ln = listSearchKey(server.clients,c);
redisAssert(ln != NULL);
listDelNode(server.clients,ln);
}
/* When client was just unblocked because of a blocking operation,
* remove it from the list with unblocked clients. */
if (c->flags & REDIS_UNBLOCKED) {
......@@ -647,20 +683,15 @@ void freeClient(redisClient *c) {
ln = listSearchKey(l,c);
redisAssert(ln != NULL);
listDelNode(l,ln);
/* We need to remember the time when we started to have zero
* attached slaves, as after some time we'll free the replication
* backlog. */
if (c->flags & REDIS_SLAVE && listLength(server.slaves) == 0)
server.repl_no_slaves_since = server.unixtime;
}
/* Case 2: we lost the connection with the master. */
if (c->flags & REDIS_MASTER) {
server.master = NULL;
server.repl_state = REDIS_REPL_CONNECT;
server.repl_down_since = server.unixtime;
/* We lost connection with our master, force our slaves to resync
* with us as well to load the new data set.
*
* If server.masterhost is NULL the user called SLAVEOF NO ONE so
* slave resync is not needed. */
if (server.masterhost != NULL) disconnectSlaves();
}
if (c->flags & REDIS_MASTER) replicationHandleMasterDisconnection();
/* If this client was scheduled for async freeing we need to remove it
* from the queue. */
......@@ -1059,6 +1090,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
if (nread) {
sdsIncrLen(c->querybuf,nread);
c->lastinteraction = server.unixtime;
if (c->flags & REDIS_MASTER) c->reploff += nread;
} else {
server.current_client = NULL;
return;
......
......@@ -220,6 +220,7 @@ struct redisCommand redisCommandTable[] = {
{"exec",execCommand,1,"sM",0,NULL,0,0,0,0,0},
{"discard",discardCommand,1,"rs",0,NULL,0,0,0,0,0},
{"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
{"psync",syncCommand,3,"ars",0,NULL,0,0,0,0,0},
{"replconf",replconfCommand,-1,"ars",0,NULL,0,0,0,0,0},
{"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0},
{"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0},
......@@ -1202,6 +1203,8 @@ void initServerConfig() {
server.masterhost = NULL;
server.masterport = 6379;
server.master = NULL;
server.cached_master = NULL;
server.repl_master_initial_offset = -1;
server.repl_state = REDIS_REPL_NONE;
server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT;
server.repl_serve_stale_data = 1;
......@@ -1209,6 +1212,16 @@ void initServerConfig() {
server.repl_down_since = time(NULL);
server.repl_disable_tcp_nodelay = 0;
server.slave_priority = REDIS_DEFAULT_SLAVE_PRIORITY;
server.master_repl_offset = 0;
/* Replication partial resync backlog */
server.repl_backlog = NULL;
server.repl_backlog_size = REDIS_DEFAULT_REPL_BACKLOG_SIZE;
server.repl_backlog_histlen = 0;
server.repl_backlog_idx = 0;
server.repl_backlog_off = 0;
server.repl_backlog_time_limit = REDIS_DEFAULT_REPL_BACKLOG_TIME_LIMIT;
server.repl_no_slaves_since = time(NULL);
/* Client output buffer limits */
server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].hard_limit_bytes = 0;
......@@ -1522,7 +1535,7 @@ void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
{
if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF)
feedAppendOnlyFile(cmd,dbid,argv,argc);
if (flags & REDIS_PROPAGATE_REPL && listLength(server.slaves))
if (flags & REDIS_PROPAGATE_REPL)
replicationFeedSlaves(server.slaves,dbid,argv,argc);
}
......@@ -2151,13 +2164,15 @@ sds genRedisInfoString(char *section) {
"master_link_status:%s\r\n"
"master_last_io_seconds_ago:%d\r\n"
"master_sync_in_progress:%d\r\n"
"slave_repl_offset:%lld\r\n"
,server.masterhost,
server.masterport,
(server.repl_state == REDIS_REPL_CONNECTED) ?
"up" : "down",
server.master ?
((int)(server.unixtime-server.master->lastinteraction)) : -1,
server.repl_state == REDIS_REPL_TRANSFER
server.repl_state == REDIS_REPL_TRANSFER,
server.master ? server.master->reploff : -1
);
if (server.repl_state == REDIS_REPL_TRANSFER) {
......@@ -2215,6 +2230,17 @@ sds genRedisInfoString(char *section) {
slaveid++;
}
}
info = sdscatprintf(info,
"master_repl_offset:%lld\r\n"
"repl_backlog_active:%d\r\n"
"repl_backlog_size:%lld\r\n"
"repl_backlog_first_byte_offset:%lld\r\n"
"repl_backlog_histlen:%lld\r\n",
server.master_repl_offset,
server.repl_backlog != NULL,
server.repl_backlog_size,
server.repl_backlog_off,
server.repl_backlog_histlen);
}
/* CPU */
......
......@@ -93,6 +93,9 @@
#define REDIS_REPL_PING_SLAVE_PERIOD 10
#define REDIS_RUN_ID_SIZE 40
#define REDIS_OPS_SEC_SAMPLES 16
#define REDIS_DEFAULT_REPL_BACKLOG_SIZE (1024*1024) /* 1mb */
#define REDIS_DEFAULT_REPL_BACKLOG_TIME_LIMIT (60*60) /* 1 hour */
#define REDIS_REPL_BACKLOG_MIN_SIZE (1024*16) /* 16k */
/* Protocol and I/O related defines */
#define REDIS_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */
......@@ -100,6 +103,7 @@
#define REDIS_REPLY_CHUNK_BYTES (16*1024) /* 16k output buffer */
#define REDIS_INLINE_MAX_SIZE (1024*64) /* Max size of inline reads */
#define REDIS_MBULK_BIG_ARG (1024*32)
#define REDIS_LONGSTR_SIZE 21 /* Bytes needed for long -> str */
/* Hash table parameters */
#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
......@@ -407,7 +411,8 @@ typedef struct redisClient {
long bulklen; /* length of bulk argument in multi bulk request */
list *reply;
unsigned long reply_bytes; /* Tot bytes of objects in reply list */
int sentlen;
int sentlen; /* Amount of bytes already sent in the current
buffer or object being sent. */
time_t ctime; /* Client creation time */
time_t lastinteraction; /* time of the last interaction, used for timeout */
time_t obuf_soft_limit_reached_time;
......@@ -417,6 +422,8 @@ typedef struct redisClient {
int repldbfd; /* replication DB file descriptor */
long repldboff; /* replication DB file offset */
off_t repldbsize; /* replication DB file size */
long long reploff; /* replication offset if this is our master */
char replrunid[REDIS_RUN_ID_SIZE+1]; /* master run id if this is a master */
int slave_listening_port; /* As configured with: SLAVECONF listening-port */
multiState mstate; /* MULTI/EXEC state */
blockingState bpop; /* blocking state */
......@@ -662,7 +669,6 @@ struct redisServer {
list *clients; /* List of active clients */
list *clients_to_close; /* Clients to close asynchronously */
list *slaves, *monitors; /* List of slaves and MONITORs */
int slaveseldb; /* Last SELECTed DB in replication output */
redisClient *current_client; /* Current client, only used on crash report */
char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
dict *migrate_cached_sockets;/* MIGRATE cached sockets */
......@@ -745,13 +751,27 @@ struct redisServer {
int syslog_enabled; /* Is syslog enabled? */
char *syslog_ident; /* Syslog ident */
int syslog_facility; /* Syslog facility */
/* Slave specific fields */
/* Replication (master) */
int slaveseldb; /* Last SELECTed DB in replication output */
long long master_repl_offset; /* Global replication offset */
int repl_ping_slave_period; /* Master pings the slave every N seconds */
char *repl_backlog; /* Replication backlog for partial syncs */
long long repl_backlog_size; /* Backlog circular buffer size */
long long repl_backlog_histlen; /* Backlog actual data length */
long long repl_backlog_idx; /* Backlog circular buffer current offset */
long long repl_backlog_off; /* Replication offset of first byte in the
backlog buffer. */
time_t repl_backlog_time_limit; /* Time without slaves after the backlog
gets released. */
time_t repl_no_slaves_since; /* We have no slaves since that time.
Only valid if server.slaves len is 0. */
/* Replication (slave) */
char *masterauth; /* AUTH with this password with master */
char *masterhost; /* Hostname of master */
int masterport; /* Port of master */
int repl_ping_slave_period; /* Master pings the slave every N seconds */
int repl_timeout; /* Timeout after N seconds of master idle */
redisClient *master; /* Client that is master for this slave */
redisClient *cached_master; /* Cached master to be reused for PSYNC. */
int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
int repl_state; /* Replication status if the instance is a slave */
off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
......@@ -766,6 +786,8 @@ struct redisServer {
time_t repl_down_since; /* Unix time at which link with master went down */
int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
int slave_priority; /* Reported in INFO and used by Sentinel. */
char repl_master_runid[REDIS_RUN_ID_SIZE+1]; /* Master run id for PSYNC. */
long long repl_master_initial_offset; /* Master PSYNC offset. */
/* Limits */
unsigned int maxclients; /* Max number of simultaneous clients */
unsigned long long maxmemory; /* Max number of memory bytes to use */
......@@ -930,6 +952,7 @@ void exitFromChild(int retcode);
redisClient *createClient(int fd);
void closeTimedoutClients(void);
void freeClient(redisClient *c);
void freeClientAsync(redisClient *c);
void resetClient(redisClient *c);
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
void addReply(redisClient *c, robj *obj);
......@@ -1053,6 +1076,9 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **argv, int argc);
void updateSlavesWaitingBgsave(int bgsaveerr);
void replicationCron(void);
void replicationHandleMasterDisconnection(void);
void replicationCacheMaster(redisClient *c);
void resizeReplicationBacklog(long long newsize);
/* Generic persistence functions */
void startLoading(FILE *fp);
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册