Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
apache
Iotdb
提交
f869a95c
I
Iotdb
项目概览
apache
/
Iotdb
8 个月 前同步成功
通知
25
Star
3344
Fork
916
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
I
Iotdb
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
f869a95c
编写于
8月 30, 2023
作者:
P
Potato
提交者:
GitHub
8月 30, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[IOTDB-6119] Add ConfigNode leader service check (#10985)
上级
1357cede
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
106 addition
and
49 deletion
+106
-49
iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/consensus/statemachine/ConfigRegionStateMachine.java
...node/consensus/statemachine/ConfigRegionStateMachine.java
+29
-27
iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/consensus/ConsensusManager.java
.../iotdb/confignode/manager/consensus/ConsensusManager.java
+16
-9
iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/cq/CQManager.java
...ava/org/apache/iotdb/confignode/manager/cq/CQManager.java
+0
-9
iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/IStateMachine.java
...c/main/java/org/apache/iotdb/consensus/IStateMachine.java
+5
-0
iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/config/RatisConfig.java
...n/java/org/apache/iotdb/consensus/config/RatisConfig.java
+45
-3
iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/ratis/ApplicationStateMachineProxy.java
...e/iotdb/consensus/ratis/ApplicationStateMachineProxy.java
+5
-0
iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/ratis/utils/Utils.java
...in/java/org/apache/iotdb/consensus/ratis/utils/Utils.java
+3
-0
iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/simple/SimpleConsensusServerImpl.java
...che/iotdb/consensus/simple/SimpleConsensusServerImpl.java
+1
-0
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/client/ConfigNodeClient.java
...org/apache/iotdb/db/protocol/client/ConfigNodeClient.java
+2
-1
未找到文件。
iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/consensus/statemachine/ConfigRegionStateMachine.java
浏览文件 @
f869a95c
...
...
@@ -198,33 +198,7 @@ public class ConfigRegionStateMachine
// We get currentNodeId here because the currentNodeId
// couldn't initialize earlier than the ConfigRegionStateMachine
int
currentNodeId
=
ConfigNodeDescriptor
.
getInstance
().
getConf
().
getConfigNodeId
();
if
(
currentNodeId
==
newLeaderId
)
{
LOGGER
.
info
(
"Current node [nodeId: {}, ip:port: {}] becomes Leader"
,
newLeaderId
,
currentNodeTEndPoint
);
// Always start load services first
configManager
.
getLoadManager
().
startLoadServices
();
// Start leader scheduling services
configManager
.
getProcedureManager
().
shiftExecutor
(
true
);
configManager
.
getRetryFailedTasksThread
().
startRetryFailedTasksService
();
configManager
.
getPartitionManager
().
startRegionCleaner
();
// we do cq recovery async for two reasons:
// 1. For performance: cq recovery may be time-consuming, we use another thread to do it in
// make notifyLeaderChanged not blocked by it
// 2. For correctness: in cq recovery processing, it will use ConsensusManager which may be
// initialized after notifyLeaderChanged finished
threadPool
.
submit
(()
->
configManager
.
getCQManager
().
startCQScheduler
());
threadPool
.
submit
(
()
->
configManager
.
getPipeManager
().
getPipeRuntimeCoordinator
().
startPipeMetaSync
());
threadPool
.
submit
(
()
->
configManager
.
getPipeManager
().
getPipeRuntimeCoordinator
().
startPipeHeartbeat
());
}
else
{
if
(
currentNodeId
!=
newLeaderId
)
{
LOGGER
.
info
(
"Current node [nodeId:{}, ip:port: {}] is not longer the leader, "
+
"the new leader is [nodeId:{}]"
,
...
...
@@ -244,6 +218,34 @@ public class ConfigRegionStateMachine
}
}
@Override
public
void
notifyLeaderReady
()
{
LOGGER
.
info
(
"Current node [nodeId: {}, ip:port: {}] becomes Leader"
,
ConfigNodeDescriptor
.
getInstance
().
getConf
().
getConfigNodeId
(),
currentNodeTEndPoint
);
// Always start load services first
configManager
.
getLoadManager
().
startLoadServices
();
// Start leader scheduling services
configManager
.
getProcedureManager
().
shiftExecutor
(
true
);
configManager
.
getRetryFailedTasksThread
().
startRetryFailedTasksService
();
configManager
.
getPartitionManager
().
startRegionCleaner
();
// we do cq recovery async for two reasons:
// 1. For performance: cq recovery may be time-consuming, we use another thread to do it in
// make notifyLeaderChanged not blocked by it
// 2. For correctness: in cq recovery processing, it will use ConsensusManager which may be
// initialized after notifyLeaderChanged finished
threadPool
.
submit
(()
->
configManager
.
getCQManager
().
startCQScheduler
());
threadPool
.
submit
(
()
->
configManager
.
getPipeManager
().
getPipeRuntimeCoordinator
().
startPipeMetaSync
());
threadPool
.
submit
(
()
->
configManager
.
getPipeManager
().
getPipeRuntimeCoordinator
().
startPipeHeartbeat
());
}
@Override
public
void
start
()
{
if
(
ConsensusFactory
.
SIMPLE_CONSENSUS
.
equals
(
CONF
.
getConfigNodeConsensusProtocolClass
()))
{
...
...
iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/consensus/ConsensusManager.java
浏览文件 @
f869a95c
...
...
@@ -336,6 +336,10 @@ public class ConsensusManager {
return
consensusImpl
.
isLeader
(
DEFAULT_CONSENSUS_GROUP_ID
);
}
public
boolean
isLeaderReady
()
{
return
consensusImpl
.
isLeaderReady
(
DEFAULT_CONSENSUS_GROUP_ID
);
}
/** @return ConfigNode-leader's location if leader exists, null otherwise. */
public
TConfigNodeLocation
getLeader
()
{
for
(
int
retry
=
0
;
retry
<
50
;
retry
++)
{
...
...
@@ -366,25 +370,28 @@ public class ConsensusManager {
/**
* Confirm the current ConfigNode's leadership.
*
* @return SUCCESS_STATUS if the current ConfigNode is leader, NEED_REDIRECTION otherwise
* @return SUCCESS_STATUS if the current ConfigNode is leader and has been ready yet,
* NEED_REDIRECTION otherwise
*/
public
TSStatus
confirmLeader
()
{
TSStatus
result
=
new
TSStatus
();
if
(
isLeader
())
{
return
result
.
setCode
(
TSStatusCode
.
SUCCESS_STATUS
.
getStatusCode
());
if
(
isLeaderReady
())
{
result
.
setCode
(
TSStatusCode
.
SUCCESS_STATUS
.
getStatusCode
());
}
else
{
result
.
setCode
(
TSStatusCode
.
REDIRECTION_RECOMMEND
.
getStatusCode
());
result
.
setMessage
(
"The current ConfigNode is not leader, please redirect to a new ConfigNode."
);
if
(
isLeader
())
{
result
.
setMessage
(
"The current ConfigNode is leader but not ready yet, please try again later."
);
}
else
{
result
.
setMessage
(
"The current ConfigNode is not leader, please redirect to a new ConfigNode."
);
}
TConfigNodeLocation
leaderLocation
=
getLeader
();
if
(
leaderLocation
!=
null
)
{
result
.
setRedirectNode
(
leaderLocation
.
getInternalEndPoint
());
}
return
result
;
}
return
result
;
}
public
ConsensusGroupId
getConsensusGroupId
()
{
...
...
iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/cq/CQManager.java
浏览文件 @
f869a95c
...
...
@@ -114,15 +114,6 @@ public class CQManager {
}
public
void
startCQScheduler
()
{
try
{
/*
TODO: remove this after fixing IOTDB-6085
sleep here because IOTDB-6085: NullPointerException in readAsync when Ratis leader is changing
*/
Thread
.
sleep
(
1000
);
}
catch
(
InterruptedException
e
)
{
Thread
.
currentThread
().
interrupt
();
}
lock
.
writeLock
().
lock
();
try
{
// 1. shutdown previous cq schedule thread pool
...
...
iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/IStateMachine.java
浏览文件 @
f869a95c
...
...
@@ -168,6 +168,11 @@ public interface IStateMachine {
default
void
notifyConfigurationChanged
(
long
term
,
long
index
,
List
<
Peer
>
newConfiguration
)
{
// do nothing default
}
/** Notify the {@link IStateMachine} that this server becomes ready after changed to leader. */
default
void
notifyLeaderReady
()
{
// do nothing default
}
}
/**
...
...
iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/config/RatisConfig.java
浏览文件 @
f869a95c
...
...
@@ -41,6 +41,7 @@ public class RatisConfig {
private
final
Impl
impl
;
private
final
LeaderLogAppender
leaderLogAppender
;
private
final
Read
read
;
private
final
Utils
utils
;
private
RatisConfig
(
Rpc
rpc
,
...
...
@@ -52,7 +53,8 @@ public class RatisConfig {
Client
client
,
Impl
impl
,
LeaderLogAppender
leaderLogAppender
,
Read
read
)
{
Read
read
,
Utils
utils
)
{
this
.
rpc
=
rpc
;
this
.
leaderElection
=
leaderElection
;
this
.
snapshot
=
snapshot
;
...
...
@@ -63,6 +65,7 @@ public class RatisConfig {
this
.
impl
=
impl
;
this
.
leaderLogAppender
=
leaderLogAppender
;
this
.
read
=
read
;
this
.
utils
=
utils
;
}
public
Rpc
getRpc
()
{
...
...
@@ -105,6 +108,10 @@ public class RatisConfig {
return
read
;
}
public
Utils
getUtils
()
{
return
utils
;
}
public
static
Builder
newBuilder
()
{
return
new
Builder
();
}
...
...
@@ -117,11 +124,11 @@ public class RatisConfig {
private
ThreadPool
threadPool
;
private
Log
log
;
private
Grpc
grpc
;
private
Client
client
;
private
Impl
impl
;
private
LeaderLogAppender
leaderLogAppender
;
private
Read
read
;
private
Utils
utils
;
public
RatisConfig
build
()
{
return
new
RatisConfig
(
...
...
@@ -135,7 +142,8 @@ public class RatisConfig {
Optional
.
ofNullable
(
impl
).
orElseGet
(()
->
Impl
.
newBuilder
().
build
()),
Optional
.
ofNullable
(
leaderLogAppender
)
.
orElseGet
(()
->
LeaderLogAppender
.
newBuilder
().
build
()),
Optional
.
ofNullable
(
read
).
orElseGet
(()
->
Read
.
newBuilder
().
build
()));
Optional
.
ofNullable
(
read
).
orElseGet
(()
->
Read
.
newBuilder
().
build
()),
Optional
.
ofNullable
(
utils
).
orElseGet
(()
->
Utils
.
newBuilder
().
build
()));
}
public
Builder
setRpc
(
Rpc
rpc
)
{
...
...
@@ -187,6 +195,10 @@ public class RatisConfig {
this
.
read
=
read
;
return
this
;
}
public
void
setUtils
(
Utils
utils
)
{
this
.
utils
=
utils
;
}
}
/** server rpc timeout related. */
...
...
@@ -1104,4 +1116,34 @@ public class RatisConfig {
}
}
}
public
static
class
Utils
{
private
final
int
sleepDeviationThresholdMs
;
private
Utils
(
int
sleepDeviationThresholdMs
)
{
this
.
sleepDeviationThresholdMs
=
sleepDeviationThresholdMs
;
}
public
int
getSleepDeviationThresholdMs
()
{
return
sleepDeviationThresholdMs
;
}
public
static
Utils
.
Builder
newBuilder
()
{
return
new
Utils
.
Builder
();
}
public
static
class
Builder
{
private
int
sleepDeviationThresholdMs
=
4
*
1000
;
public
Utils
build
()
{
return
new
Utils
(
sleepDeviationThresholdMs
);
}
public
void
setSleepDeviationThresholdMs
(
int
sleepDeviationThresholdMs
)
{
this
.
sleepDeviationThresholdMs
=
sleepDeviationThresholdMs
;
}
}
}
}
iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/ratis/ApplicationStateMachineProxy.java
浏览文件 @
f869a95c
...
...
@@ -318,6 +318,11 @@ public class ApplicationStateMachineProxy extends BaseStateMachine {
Utils
.
fromRaftPeerIdToNodeId
(
newLeaderId
));
}
@Override
public
void
notifyLeaderReady
()
{
applicationStateMachine
.
event
().
notifyLeaderReady
();
}
@Override
public
void
notifyConfigurationChanged
(
long
term
,
long
index
,
RaftConfigurationProto
newRaftConfiguration
)
{
...
...
iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/ratis/utils/Utils.java
浏览文件 @
f869a95c
...
...
@@ -289,5 +289,8 @@ public class Utils {
:
RaftServerConfigKeys
.
Read
.
Option
.
LINEARIZABLE
;
RaftServerConfigKeys
.
Read
.
setOption
(
properties
,
option
);
RaftServerConfigKeys
.
Read
.
setTimeout
(
properties
,
config
.
getRead
().
getReadTimeout
());
RaftServerConfigKeys
.
setSleepDeviationThreshold
(
properties
,
config
.
getUtils
().
getSleepDeviationThresholdMs
());
}
}
iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/simple/SimpleConsensusServerImpl.java
浏览文件 @
f869a95c
...
...
@@ -50,6 +50,7 @@ public class SimpleConsensusServerImpl implements IStateMachine {
stateMachine
.
start
();
// Notify itself as the leader
stateMachine
.
event
().
notifyLeaderChanged
(
peer
.
getGroupId
(),
peer
.
getNodeId
());
stateMachine
.
event
().
notifyLeaderReady
();
}
@Override
...
...
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/client/ConfigNodeClient.java
浏览文件 @
f869a95c
...
...
@@ -307,7 +307,8 @@ public class ConfigNodeClient implements IConfigNodeRPCService.Iface, ThriftClie
configLeader
=
null
;
}
logger
.
warn
(
"Failed to connect to ConfigNode {} from DataNode {}, because the current node is not leader, try next node"
,
"Failed to connect to ConfigNode {} from DataNode {}, because the current node is not "
+
"leader or not ready yet, will try again later"
,
configNode
,
config
.
getAddressAndPort
());
return
true
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录