Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
别团等shy哥发育
redis
提交
ccaba966
R
redis
项目概览
别团等shy哥发育
/
redis
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
redis
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ccaba966
编写于
11月 18, 2013
作者:
A
antirez
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Sentinel: state machine and timeouts simplified.
上级
3c4497e8
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
48 addition
and
47 deletion
+48
-47
src/sentinel.c
src/sentinel.c
+48
-47
未找到文件。
src/sentinel.c
浏览文件 @
ccaba966
...
...
@@ -87,7 +87,7 @@ typedef struct sentinelAddr {
#define SENTINEL_SLAVE_RECONF_RETRY_PERIOD 10000
#define SENTINEL_DEFAULT_PARALLEL_SYNCS 1
#define SENTINEL_MIN_LINK_RECONNECT_PERIOD 15000
#define SENTINEL_DEFAULT_FAILOVER_TIMEOUT (60*
1
5*1000)
#define SENTINEL_DEFAULT_FAILOVER_TIMEOUT (60*5*1000)
#define SENTINEL_MAX_PENDING_COMMANDS 100
#define SENTINEL_ELECTION_TIMEOUT 10000
...
...
@@ -107,8 +107,7 @@ typedef struct sentinelAddr {
#define SENTINEL_FAILOVER_STATE_WAIT_NEXT_SLAVE 6
/* wait replication */
#define SENTINEL_FAILOVER_STATE_ALERT_CLIENTS 7
/* Run user script. */
#define SENTINEL_FAILOVER_STATE_WAIT_ALERT_SCRIPT 8
/* Wait script exec. */
#define SENTINEL_FAILOVER_STATE_DETECT_END 9
/* Check for failover end. */
#define SENTINEL_FAILOVER_STATE_UPDATE_CONFIG 10
/* Monitor promoted slave. */
#define SENTINEL_FAILOVER_STATE_UPDATE_CONFIG 9
/* Monitor promoted slave. */
#define SENTINEL_MASTER_LINK_STATUS_UP 0
#define SENTINEL_MASTER_LINK_STATUS_DOWN 1
...
...
@@ -1695,10 +1694,6 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
ri
->
flags
&=
~
SRI_RECONF_INPROG
;
ri
->
flags
|=
SRI_RECONF_DONE
;
sentinelEvent
(
REDIS_NOTICE
,
"+slave-reconf-done"
,
ri
,
"%@"
);
/* If we are moving forward (a new slave is now configured)
* we update the change_time as we are conceptually passing
* to the next slave. */
ri
->
failover_state_change_time
=
mstime
();
}
}
}
...
...
@@ -1970,7 +1965,6 @@ const char *sentinelFailoverStateStr(int state) {
case
SENTINEL_FAILOVER_STATE_WAIT_PROMOTION
:
return
"wait_promotion"
;
case
SENTINEL_FAILOVER_STATE_RECONF_SLAVES
:
return
"reconf_slaves"
;
case
SENTINEL_FAILOVER_STATE_ALERT_CLIENTS
:
return
"alert_clients"
;
case
SENTINEL_FAILOVER_STATE_DETECT_END
:
return
"detect_end"
;
case
SENTINEL_FAILOVER_STATE_UPDATE_CONFIG
:
return
"update_config"
;
default:
return
"unknown"
;
}
...
...
@@ -2818,17 +2812,20 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
/* If I'm not the leader, I can't continue with the failover. */
if
(
!
isleader
)
{
int
election_timeout
=
SENTINEL_ELECTION_TIMEOUT
;
/* The election timeout is the MIN between SENTINEL_ELECTION_TIMEOUT
* and the configured failover timeout. */
if
(
election_timeout
>
ri
->
failover_timeout
)
election_timeout
=
ri
->
failover_timeout
;
/* Abort the failover if I'm not the leader after some time. */
if
(
mstime
()
-
ri
->
failover_start_time
>
SENTINEL_ELECTION_TIMEOUT
)
{
if
(
mstime
()
-
ri
->
failover_start_time
>
election_timeout
)
{
sentinelEvent
(
REDIS_WARNING
,
"-failover-abort-not-elected"
,
ri
,
"%@"
);
sentinelAbortFailover
(
ri
);
}
return
;
}
sentinelEvent
(
REDIS_WARNING
,
"+elected-leader"
,
ri
,
"%@"
);
/* Start the failover going to the next state if enough time has
* elapsed. */
ri
->
failover_state
=
SENTINEL_FAILOVER_STATE_SELECT_SLAVE
;
ri
->
failover_state_change_time
=
mstime
();
sentinelEvent
(
REDIS_WARNING
,
"+failover-state-select-slave"
,
ri
,
"%@"
);
...
...
@@ -2837,6 +2834,8 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
void
sentinelFailoverSelectSlave
(
sentinelRedisInstance
*
ri
)
{
sentinelRedisInstance
*
slave
=
sentinelSelectSlave
(
ri
);
/* We don't handle the timeout in this state as the function aborts
* the failover or go forward in the next state. */
if
(
slave
==
NULL
)
{
sentinelEvent
(
REDIS_WARNING
,
"-failover-abort-no-good-slave"
,
ri
,
"%@"
);
sentinelAbortFailover
(
ri
);
...
...
@@ -2854,7 +2853,16 @@ void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
void
sentinelFailoverSendSlaveOfNoOne
(
sentinelRedisInstance
*
ri
)
{
int
retval
;
if
(
ri
->
promoted_slave
->
flags
&
SRI_DISCONNECTED
)
return
;
/* We can't send the command to the promoted slave if it is now
* disconnected. Retry again and again with this state until the timeout
* is reached, then abort the failover. */
if
(
ri
->
promoted_slave
->
flags
&
SRI_DISCONNECTED
)
{
if
(
mstime
()
-
ri
->
failover_state_change_time
>
ri
->
failover_timeout
)
{
sentinelEvent
(
REDIS_WARNING
,
"-failover-abort-slave-timeout"
,
ri
,
"%@"
);
sentinelAbortFailover
(
ri
);
}
return
;
}
/* Send SLAVEOF NO ONE command to turn the slave into a master.
* We actually register a generic callback for this command as we don't
...
...
@@ -2871,16 +2879,11 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
/* We actually wait for promotion indirectly checking with INFO when the
* slave turns into a master. */
void
sentinelFailoverWaitPromotion
(
sentinelRedisInstance
*
ri
)
{
mstime_t
elapsed
=
mstime
()
-
ri
->
failover_state_change_time
;
if
(
elapsed
>=
SENTINEL_PROMOTION_RETRY_PERIOD
)
{
sentinelEvent
(
REDIS_WARNING
,
"-promotion-timeout"
,
ri
->
promoted_slave
,
"%@"
);
sentinelEvent
(
REDIS_WARNING
,
"+failover-state-select-slave"
,
ri
,
"%@"
);
ri
->
failover_state
=
SENTINEL_FAILOVER_STATE_SELECT_SLAVE
;
ri
->
failover_state_change_time
=
mstime
();
ri
->
promoted_slave
->
flags
&=
~
SRI_PROMOTED
;
ri
->
promoted_slave
=
NULL
;
/* Just handle the timeout. Switching to the next state is handled
* by the function parsing the INFO command of the promoted slave. */
if
(
mstime
()
-
ri
->
failover_state_change_time
>
ri
->
failover_timeout
)
{
sentinelEvent
(
REDIS_WARNING
,
"-failover-abort-slave-timeout"
,
ri
,
"%@"
);
sentinelAbortFailover
(
ri
);
}
}
...
...
@@ -3004,6 +3007,8 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
}
}
dictReleaseIterator
(
di
);
/* Check if all the slaves are reconfigured and handle timeout. */
sentinelFailoverDetectEnd
(
master
);
}
...
...
@@ -3051,50 +3056,46 @@ void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
case
SENTINEL_FAILOVER_STATE_RECONF_SLAVES
:
sentinelFailoverReconfNextSlave
(
ri
);
break
;
case
SENTINEL_FAILOVER_STATE_DETECT_END
:
sentinelFailoverDetectEnd
(
ri
);
break
;
}
}
/* Abort a failover in progress with the following steps:
* 1) Set the master back to the original one, increment the config epoch.
* 2) Reconfig slaves to replicate to the old master.
* 3) Reconfig the promoted slave as a slave as well. */
/* Abort a failover in progress:
*
* This function can only be called before the promoted slave acknowledged
* the slave -> master switch. Otherwise the failover can't be aborted and
* will reach its end.
*
* If there is a promoted slave and we already got acknowledge of the
* slave -> master switch, we clear our flags and redirect to the
* new master. Eventually the config will be propagated if it is the one
* with the greater config epoch for this master.
*
* Otherwise if we still did not received the acknowledgement from the
* promoted slave, or there is no promoted slave at all, we just clear the
* failover-in-progress state as there is nothing to do (if the promoted
* slave for some reason actually received our "SLAVEOF NO ONE" command
* even if we did not received the ACK, it will be reverted to slave again
* by one of the Sentinels). */
void
sentinelAbortFailover
(
sentinelRedisInstance
*
ri
)
{
dictIterator
*
di
;
dictEntry
*
de
;
int
sentinel_role
;
redisAssert
(
ri
->
flags
&
SRI_FAILOVER_IN_PROGRESS
);
redisAssert
(
ri
->
failover_state
<=
SENTINEL_FAILOVER_STATE_WAIT_PROMOTION
);
/* Clear failover related flags from slaves.
* Also if we are the leader make sure to send SLAVEOF commands to all the
* already reconfigured slaves in order to turn them back into slaves of
* the original master. */
/* Clear failover related flags from slaves. */
di
=
dictGetIterator
(
ri
->
slaves
);
while
((
de
=
dictNext
(
di
))
!=
NULL
)
{
sentinelRedisInstance
*
slave
=
dictGetVal
(
de
);
if
(
!
(
slave
->
flags
&
SRI_DISCONNECTED
)
&&
(
slave
->
flags
&
(
SRI_PROMOTED
|
SRI_RECONF_SENT
|
SRI_RECONF_INPROG
|
SRI_RECONF_DONE
)))
{
int
retval
;
retval
=
sentinelSendSlaveOf
(
slave
,
ri
->
addr
->
ip
,
ri
->
addr
->
port
);
if
(
retval
==
REDIS_OK
)
sentinelEvent
(
REDIS_NOTICE
,
"-slave-reconf-undo"
,
slave
,
"%@"
);
}
slave
->
flags
&=
~
(
SRI_RECONF_SENT
|
SRI_RECONF_INPROG
|
SRI_RECONF_DONE
);
}
dictReleaseIterator
(
di
);
sentinel_role
=
SENTINEL_LEADER
;
ri
->
flags
&=
~
(
SRI_FAILOVER_IN_PROGRESS
|
SRI_FORCE_FAILOVER
);
ri
->
failover_state
=
SENTINEL_FAILOVER_STATE_NONE
;
ri
->
failover_state_change_time
=
mstime
();
if
(
ri
->
promoted_slave
)
{
sentinelCallClientReconfScript
(
ri
,
sentinel_role
,
"abort"
,
sentinelCallClientReconfScript
(
ri
,
SENTINEL_LEADER
,
"abort"
,
ri
->
promoted_slave
->
addr
,
ri
->
addr
);
ri
->
promoted_slave
->
flags
&=
~
SRI_PROMOTED
;
ri
->
promoted_slave
=
NULL
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录