Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
lhongjum2003
DolphinScheduler
提交
e4210d14
DolphinScheduler
项目概览
lhongjum2003
/
DolphinScheduler
与 Fork 源项目一致
Fork自
apache / DolphinScheduler
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
DolphinScheduler
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e4210d14
编写于
7月 29, 2019
作者:
leon-baoliang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor zk client.
上级
3191eb92
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
177 addition
and
176 deletion
+177
-176
escheduler-api/src/main/java/cn/escheduler/api/service/MonitorService.java
...c/main/java/cn/escheduler/api/service/MonitorService.java
+4
-4
escheduler-api/src/main/java/cn/escheduler/api/utils/ZookeeperMonitor.java
...c/main/java/cn/escheduler/api/utils/ZookeeperMonitor.java
+2
-2
escheduler-common/src/main/java/cn/escheduler/common/zk/AbstractZKClient.java
...c/main/java/cn/escheduler/common/zk/AbstractZKClient.java
+83
-11
escheduler-server/src/main/java/cn/escheduler/server/master/MasterServer.java
...c/main/java/cn/escheduler/server/master/MasterServer.java
+9
-10
escheduler-server/src/main/java/cn/escheduler/server/zk/ZKMasterClient.java
...src/main/java/cn/escheduler/server/zk/ZKMasterClient.java
+71
-128
escheduler-server/src/main/java/cn/escheduler/server/zk/ZKWorkerClient.java
...src/main/java/cn/escheduler/server/zk/ZKWorkerClient.java
+8
-21
未找到文件。
escheduler-api/src/main/java/cn/escheduler/api/service/MonitorService.java
浏览文件 @
e4210d14
...
...
@@ -65,7 +65,7 @@ public class MonitorService extends BaseService{
Map
<
String
,
Object
>
result
=
new
HashMap
<>(
5
);
List
<
MasterServer
>
masterServers
=
getServerList
(
true
);
List
<
MasterServer
>
masterServers
=
getServerList
FromZK
(
true
);
result
.
put
(
Constants
.
DATA_LIST
,
masterServers
);
putMsg
(
result
,
Status
.
SUCCESS
);
...
...
@@ -99,7 +99,7 @@ public class MonitorService extends BaseService{
public
Map
<
String
,
Object
>
queryWorker
(
User
loginUser
)
{
Map
<
String
,
Object
>
result
=
new
HashMap
<>(
5
);
List
<
MasterServer
>
workerServers
=
getServerList
(
false
);
List
<
MasterServer
>
workerServers
=
getServerList
FromZK
(
false
);
result
.
put
(
Constants
.
DATA_LIST
,
workerServers
);
putMsg
(
result
,
Status
.
SUCCESS
);
...
...
@@ -107,13 +107,13 @@ public class MonitorService extends BaseService{
return
result
;
}
private
List
<
MasterServer
>
getServerList
(
boolean
isMaster
){
private
List
<
MasterServer
>
getServerList
FromZK
(
boolean
isMaster
){
List
<
MasterServer
>
servers
=
new
ArrayList
<>();
ZookeeperMonitor
zookeeperMonitor
=
null
;
try
{
zookeeperMonitor
=
new
ZookeeperMonitor
();
ZKNodeType
zkNodeType
=
isMaster
?
ZKNodeType
.
MASTER
:
ZKNodeType
.
WORKER
;
servers
=
zookeeperMonitor
.
getServers
(
zkNodeType
);
servers
=
zookeeperMonitor
.
getServers
List
(
zkNodeType
);
}
catch
(
Exception
e
){
throw
e
;
}
finally
{
...
...
escheduler-api/src/main/java/cn/escheduler/api/utils/ZookeeperMonitor.java
浏览文件 @
e4210d14
...
...
@@ -40,7 +40,7 @@ public class ZookeeperMonitor extends AbstractZKClient{
* @return
*/
public
List
<
MasterServer
>
getMasterServers
(){
return
getServers
(
ZKNodeType
.
MASTER
);
return
getServers
List
(
ZKNodeType
.
MASTER
);
}
/**
...
...
@@ -48,7 +48,7 @@ public class ZookeeperMonitor extends AbstractZKClient{
* @return
*/
public
List
<
MasterServer
>
getWorkerServers
(){
return
getServers
(
ZKNodeType
.
WORKER
);
return
getServers
List
(
ZKNodeType
.
WORKER
);
}
private
static
List
<
ZookeeperRecord
>
zookeeperInfoList
(
String
zookeeperServers
)
{
...
...
escheduler-common/src/main/java/cn/escheduler/common/zk/AbstractZKClient.java
浏览文件 @
e4210d14
...
...
@@ -20,7 +20,6 @@ import cn.escheduler.common.Constants;
import
cn.escheduler.common.IStoppable
;
import
cn.escheduler.common.enums.ZKNodeType
;
import
cn.escheduler.common.model.MasterServer
;
import
cn.escheduler.common.utils.CollectionUtils
;
import
cn.escheduler.common.utils.DateUtils
;
import
cn.escheduler.common.utils.OSUtils
;
import
cn.escheduler.common.utils.ResInfo
;
...
...
@@ -223,28 +222,68 @@ public abstract class AbstractZKClient {
}
}
/**
* create zookeeper path according the zk node type.
* @param zkNodeType
* @return
* @throws Exception
*/
private
String
createZNodePath
(
ZKNodeType
zkNodeType
)
throws
Exception
{
// specify the format of stored data in ZK nodes
String
heartbeatZKInfo
=
ResInfo
.
getHeartBeatInfo
(
new
Date
());
// create temporary sequence nodes for master znode
String
parentPath
=
getZNodeParentPath
(
zkNodeType
);
String
serverPathPrefix
=
parentPath
+
"/"
+
OSUtils
.
getHost
();
String
registerPath
=
zkClient
.
create
().
withMode
(
CreateMode
.
EPHEMERAL_SEQUENTIAL
).
forPath
(
serverPathPrefix
+
"_"
,
heartbeatZKInfo
.
getBytes
());
logger
.
info
(
"register {} node {} success"
,
zkNodeType
.
toString
(),
registerPath
);
return
registerPath
;
}
/**
* register server, if server already exists, return null.
* @param zkNodeType
* @return register server path in zookeeper
*/
public
String
registerServer
(
ZKNodeType
zkNodeType
)
throws
Exception
{
String
registerPath
=
null
;
String
host
=
OSUtils
.
getHost
();
if
(
checkZKNodeExists
(
host
,
zkNodeType
)){
logger
.
error
(
"register failure , {} server already started on host : {}"
,
zkNodeType
.
toString
(),
host
);
return
registerPath
;
}
registerPath
=
createZNodePath
(
ZKNodeType
.
MASTER
);
// handle dead server
handleDeadServer
(
registerPath
,
zkNodeType
,
Constants
.
DELETE_ZK_OP
);
return
registerPath
;
}
/**
* opType(add): if find dead server , then add to zk deadServerPath
* opType(delete): delete path from zk
*
* @param zNode node path
* @param
serverType master or worker prefix
* @param
zkNodeType master or worker
* @param opType delete or add
* @throws Exception
*/
public
void
handleDeadServer
(
String
zNode
,
String
server
Type
,
String
opType
)
throws
Exception
{
public
void
handleDeadServer
(
String
zNode
,
ZKNodeType
zkNode
Type
,
String
opType
)
throws
Exception
{
//ip_sequenceno
String
[]
zNodesPath
=
zNode
.
split
(
"\\/"
);
String
ipSeqNo
=
zNodesPath
[
zNodesPath
.
length
-
1
];
String
type
=
serverType
.
equals
(
MASTER_PREFIX
)
?
MASTER_PREFIX
:
WORKER_PREFIX
;
String
type
=
(
zkNodeType
==
ZKNodeType
.
MASTER
)
?
MASTER_PREFIX
:
WORKER_PREFIX
;
//check server restart, if restart , dead server path in zk should be delete
if
(
opType
.
equals
(
DELETE_ZK_OP
)){
String
[]
ipAndSeqNo
=
ipSeqNo
.
split
(
UNDERLINE
);
String
ip
=
ipAndSeqNo
[
0
];
removeDeadServerByHost
(
ip
,
serverT
ype
);
removeDeadServerByHost
(
ip
,
t
ype
);
}
else
if
(
opType
.
equals
(
ADD_ZK_OP
)){
String
deadServerPath
=
deadServerZNodeParentPath
+
SINGLE_SLASH
+
type
+
UNDERLINE
+
ipSeqNo
;
...
...
@@ -253,7 +292,8 @@ public abstract class AbstractZKClient {
zkClient
.
create
().
forPath
(
deadServerPath
,(
type
+
UNDERLINE
+
ipSeqNo
).
getBytes
());
logger
.
info
(
"{} server dead , and {} added to zk dead server path success"
,
serverType
,
zNode
);
logger
.
info
(
"{} server dead , and {} added to zk dead server path success"
,
zkNodeType
.
toString
(),
zNode
);
}
}
...
...
@@ -314,8 +354,8 @@ public abstract class AbstractZKClient {
* @param zkNodeType
* @return
*/
public
List
<
MasterServer
>
getServers
(
ZKNodeType
zkNodeType
){
Map
<
String
,
String
>
masterMap
=
getServer
List
(
zkNodeType
);
public
List
<
MasterServer
>
getServers
List
(
ZKNodeType
zkNodeType
){
Map
<
String
,
String
>
masterMap
=
getServer
Maps
(
zkNodeType
);
String
parentPath
=
getZNodeParentPath
(
zkNodeType
);
List
<
MasterServer
>
masterServers
=
new
ArrayList
<>();
...
...
@@ -332,7 +372,7 @@ public abstract class AbstractZKClient {
* result : {host : resource info}
* @return
*/
public
Map
<
String
,
String
>
getServer
List
(
ZKNodeType
zkNodeType
){
public
Map
<
String
,
String
>
getServer
Maps
(
ZKNodeType
zkNodeType
){
Map
<
String
,
String
>
masterMap
=
new
HashMap
<>();
try
{
...
...
@@ -363,7 +403,7 @@ public abstract class AbstractZKClient {
host
,
zkNodeType
.
toString
());
return
false
;
}
Map
<
String
,
String
>
serverMaps
=
getServer
List
(
zkNodeType
);
Map
<
String
,
String
>
serverMaps
=
getServer
Maps
(
zkNodeType
);
for
(
String
hostKey
:
serverMaps
.
keySet
()){
if
(
hostKey
.
startsWith
(
host
)){
return
true
;
...
...
@@ -497,6 +537,38 @@ public abstract class AbstractZKClient {
}
}
/**
* server self dead, stop all threads
* @param serverHost
* @param zkNodeType
*/
protected
boolean
checkServerSelfDead
(
String
serverHost
,
ZKNodeType
zkNodeType
)
{
if
(
serverHost
.
equals
(
OSUtils
.
getHost
()))
{
logger
.
error
(
"{} server({}) of myself dead , stopping..."
,
zkNodeType
.
toString
(),
serverHost
);
stoppable
.
stop
(
String
.
format
(
" {} server {} of myself dead , stopping..."
,
zkNodeType
.
toString
(),
serverHost
));
return
true
;
}
return
false
;
}
/**
* get host ip, string format: masterParentPath/ip_000001/value
* @param path
* @return
*/
protected
String
getHostByEventDataPath
(
String
path
)
{
int
startIndex
=
path
.
lastIndexOf
(
"/"
)+
1
;
int
endIndex
=
path
.
lastIndexOf
(
"_"
);
if
(
startIndex
>=
endIndex
){
logger
.
error
(
"parse ip error"
);
return
""
;
}
return
path
.
substring
(
startIndex
,
endIndex
);
}
@Override
public
String
toString
()
{
return
"AbstractZKClient{"
+
...
...
escheduler-server/src/main/java/cn/escheduler/server/master/MasterServer.java
浏览文件 @
e4210d14
...
...
@@ -119,8 +119,6 @@ public class MasterServer implements CommandLineRunner, IStoppable {
public
MasterServer
(
ProcessDao
processDao
){
zkMasterClient
=
ZKMasterClient
.
getZKMasterClient
(
processDao
);
this
.
serverDao
=
zkMasterClient
.
getServerDao
();
this
.
alertDao
=
zkMasterClient
.
getAlertDao
();
}
public
void
run
(
ProcessDao
processDao
){
...
...
@@ -128,6 +126,11 @@ public class MasterServer implements CommandLineRunner, IStoppable {
heartBeatInterval
=
conf
.
getInt
(
Constants
.
MASTER_HEARTBEAT_INTERVAL
,
Constants
.
defaultMasterHeartbeatInterval
);
// master exec thread pool num
int
masterExecThreadNum
=
conf
.
getInt
(
Constants
.
MASTER_EXEC_THREADS
,
Constants
.
defaultMasterExecThreadNum
);
heartbeatMasterService
=
ThreadUtils
.
newDaemonThreadScheduledExecutor
(
"Master-Main-Thread"
,
Constants
.
defaulMasterHeartbeatThreadNum
);
// heartbeat thread implement
...
...
@@ -140,10 +143,6 @@ public class MasterServer implements CommandLineRunner, IStoppable {
heartbeatMasterService
.
scheduleAtFixedRate
(
heartBeatThread
,
5
,
heartBeatInterval
,
TimeUnit
.
SECONDS
);
// master exec thread pool num
int
masterExecThreadNum
=
conf
.
getInt
(
Constants
.
MASTER_EXEC_THREADS
,
Constants
.
defaultMasterExecThreadNum
);
// master scheduler thread
MasterSchedulerThread
masterSchedulerThread
=
new
MasterSchedulerThread
(
zkMasterClient
,
...
...
@@ -154,6 +153,8 @@ public class MasterServer implements CommandLineRunner, IStoppable {
masterSchedulerService
.
execute
(
masterSchedulerThread
);
// start QuartzExecutors
// TODO...
// what system should do if exception
try
{
ProcessScheduleJob
.
init
(
processDao
);
QuartzExecutors
.
getInstance
().
start
();
...
...
@@ -173,13 +174,11 @@ public class MasterServer implements CommandLineRunner, IStoppable {
Runtime
.
getRuntime
().
addShutdownHook
(
new
Thread
(
new
Runnable
()
{
@Override
public
void
run
()
{
String
host
=
OSUtils
.
getHost
();
// clear master table register info
serverDao
.
deleteMaster
(
host
);
logger
.
info
(
"master server stopped"
);
if
(
zkMasterClient
.
getActiveMasterNum
()
<=
1
)
{
for
(
int
i
=
0
;
i
<
Constants
.
ESCHEDULER_WARN_TIMES_FAILOVER
;
i
++)
{
alertDao
.
sendServerStopedAlert
(
1
,
host
,
"Master-Server"
);
zkMasterClient
.
getAlertDao
().
sendServerStopedAlert
(
1
,
OSUtils
.
getHost
(),
"Master-Server"
);
}
}
}
...
...
escheduler-server/src/main/java/cn/escheduler/server/zk/ZKMasterClient.java
浏览文件 @
e4210d14
...
...
@@ -20,8 +20,6 @@ import cn.escheduler.common.Constants;
import
cn.escheduler.common.enums.ExecutionStatus
;
import
cn.escheduler.common.enums.ZKNodeType
;
import
cn.escheduler.common.model.MasterServer
;
import
cn.escheduler.common.utils.CollectionUtils
;
import
cn.escheduler.common.utils.OSUtils
;
import
cn.escheduler.common.zk.AbstractZKClient
;
import
cn.escheduler.dao.AlertDao
;
import
cn.escheduler.dao.DaoFactory
;
...
...
@@ -29,8 +27,6 @@ import cn.escheduler.dao.ProcessDao;
import
cn.escheduler.dao.ServerDao
;
import
cn.escheduler.dao.model.ProcessInstance
;
import
cn.escheduler.dao.model.TaskInstance
;
import
cn.escheduler.dao.model.WorkerServer
;
import
cn.escheduler.common.utils.ResInfo
;
import
cn.escheduler.server.utils.ProcessUtils
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.curator.framework.CuratorFramework
;
...
...
@@ -39,7 +35,6 @@ import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent;
import
org.apache.curator.framework.recipes.cache.PathChildrenCacheListener
;
import
org.apache.curator.framework.recipes.locks.InterProcessMutex
;
import
org.apache.curator.utils.ThreadUtils
;
import
org.apache.zookeeper.CreateMode
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -68,6 +63,7 @@ public class ZKMasterClient extends AbstractZKClient {
* master database access
*/
private
ServerDao
serverDao
=
null
;
/**
* alert database access
*/
...
...
@@ -77,9 +73,6 @@ public class ZKMasterClient extends AbstractZKClient {
*/
private
ProcessDao
processDao
;
private
Date
createTime
=
null
;
/**
* zkMasterClient
*/
...
...
@@ -131,7 +124,7 @@ public class ZKMasterClient extends AbstractZKClient {
this
.
listenerWorker
();
// register master
this
.
registMaster
();
this
.
regist
er
Master
();
// check if fault tolerance is required,failure and tolerance
if
(
getActiveMasterNum
()
==
1
)
{
...
...
@@ -157,15 +150,6 @@ public class ZKMasterClient extends AbstractZKClient {
this
.
alertDao
=
DaoFactory
.
getDaoInstance
(
AlertDao
.
class
);
this
.
processDao
=
DaoFactory
.
getDaoInstance
(
ProcessDao
.
class
);
}
/**
* get maste dao
* @return
*/
public
ServerDao
getServerDao
(){
return
serverDao
;
}
/**
* get alert dao
* @return
...
...
@@ -174,40 +158,24 @@ public class ZKMasterClient extends AbstractZKClient {
return
alertDao
;
}
/**
* register master znode
*/
public
void
registMaster
(){
// get current date
Date
now
=
new
Date
();
createTime
=
now
;
public
void
registerMaster
(){
try
{
String
osHost
=
OSUtils
.
getHost
();
// zookeeper node exists, cannot start a new one.
if
(
checkZKNodeExists
(
osHost
,
ZKNodeType
.
MASTER
)){
logger
.
error
(
"register failure , master already started on host : {}"
,
osHost
);
// exit system
String
serverPath
=
registerServer
(
ZKNodeType
.
MASTER
);
if
(
StringUtils
.
isEmpty
(
serverPath
)){
System
.
exit
(-
1
);
}
createMasterZNode
(
now
);
logger
.
info
(
"register master node {} success"
,
masterZNode
);
// handle dead server
handleDeadServer
(
masterZNode
,
Constants
.
MASTER_PREFIX
,
Constants
.
DELETE_ZK_OP
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"register master failure : "
+
e
.
getMessage
(),
e
);
System
.
exit
(-
1
);
}
}
private
void
createMasterZNode
(
Date
now
)
throws
Exception
{
// specify the format of stored data in ZK nodes
String
heartbeatZKInfo
=
ResInfo
.
getHeartBeatInfo
(
now
);
// create temporary sequence nodes for master znode
masterZNode
=
zkClient
.
create
().
withMode
(
CreateMode
.
EPHEMERAL_SEQUENTIAL
).
forPath
(
masterZNodeParentPath
+
"/"
+
OSUtils
.
getHost
()
+
"_"
,
heartbeatZKInfo
.
getBytes
());
}
/**
...
...
@@ -217,8 +185,6 @@ public class ZKMasterClient extends AbstractZKClient {
PathChildrenCache
masterPc
=
new
PathChildrenCache
(
zkClient
,
masterZNodeParentPath
,
true
,
defaultThreadFactory
);
try
{
Date
now
=
new
Date
();
createTime
=
now
;
masterPc
.
start
();
masterPc
.
getListenable
().
addListener
(
new
PathChildrenCacheListener
()
{
@Override
...
...
@@ -229,8 +195,11 @@ public class ZKMasterClient extends AbstractZKClient {
break
;
case
CHILD_REMOVED:
String
path
=
event
.
getData
().
getPath
();
logger
.
info
(
"master node deleted : {}"
,
event
.
getData
().
getPath
());
removeMasterNode
(
path
);
String
serverHost
=
getHostByEventDataPath
(
path
);
if
(
checkServerSelfDead
(
serverHost
,
ZKNodeType
.
MASTER
)){
return
;
}
removeZKNodePath
(
path
,
ZKNodeType
.
MASTER
,
true
);
break
;
case
CHILD_UPDATED:
break
;
...
...
@@ -242,46 +211,69 @@ public class ZKMasterClient extends AbstractZKClient {
}
catch
(
Exception
e
){
logger
.
error
(
"monitor master failed : "
+
e
.
getMessage
(),
e
);
}
}
}
private
void
removeMasterNode
(
String
path
)
{
InterProcessMutex
mutexLock
=
null
;
private
void
removeZKNodePath
(
String
path
,
ZKNodeType
zkNodeType
,
boolean
failover
)
{
logger
.
info
(
"{} node deleted : {}"
,
zkNodeType
.
toString
(),
path
);
InterProcessMutex
mutex
=
null
;
try
{
// handle dead server, add to zk dead server pth
handleDeadServer
(
path
,
Constants
.
MASTER_PREFIX
,
Constants
.
ADD_ZK_OP
);
String
failoverPath
=
getFailoverLockPath
(
zkNodeType
);
// create a distributed lock
mutex
=
new
InterProcessMutex
(
getZkClient
(),
failoverPath
);
mutex
.
acquire
();
if
(
masterZNode
.
equals
(
path
)){
logger
.
error
(
"master server({}) of myself dead , stopping..."
,
path
);
stoppable
.
stop
(
String
.
format
(
"master server(%s) of myself dead , stopping..."
,
path
));
return
;
String
serverHost
=
getHostByEventDataPath
(
path
);
// handle dead server
handleDeadServer
(
path
,
zkNodeType
,
Constants
.
ADD_ZK_OP
);
//alert server down.
alertServerDown
(
serverHost
,
zkNodeType
);
//failover server
if
(
failover
){
failoverServerWhenDown
(
serverHost
,
zkNodeType
);
}
// create a distributed lock, and the root node path of the lock space is /escheduler/lock/failover/master
String
znodeLock
=
zkMasterClient
.
getMasterFailoverLockPath
();
mutexLock
=
new
InterProcessMutex
(
zkMasterClient
.
getZkClient
(),
znodeLock
);
mutexLock
.
acquire
();
String
masterHost
=
getHostByEventDataPath
(
path
);
for
(
int
i
=
0
;
i
<
Constants
.
ESCHEDULER_WARN_TIMES_FAILOVER
;
i
++)
{
alertDao
.
sendServerStopedAlert
(
1
,
masterHost
,
"Master-Server"
);
}
catch
(
Exception
e
){
logger
.
error
(
"{} server failover failed."
,
zkNodeType
.
toString
());
logger
.
error
(
"failover exception : "
+
e
.
getMessage
(),
e
);
}
if
(
StringUtils
.
isNotEmpty
(
masterHost
))
{
failoverMaster
(
masterHost
);
finally
{
releaseMutex
(
mutex
);
}
}
catch
(
Exception
e
){
logger
.
error
(
"master failover failed : "
+
e
.
getMessage
(),
e
);
}
finally
{
if
(
mutexLock
!=
null
){
try
{
mutexLock
.
release
();
}
catch
(
Exception
e
)
{
logger
.
error
(
"lock relase failed : "
+
e
.
getMessage
(),
e
);
}
private
void
failoverServerWhenDown
(
String
serverHost
,
ZKNodeType
zkNodeType
)
throws
Exception
{
if
(
StringUtils
.
isEmpty
(
serverHost
)){
return
;
}
switch
(
zkNodeType
){
case
MASTER:
failoverMaster
(
serverHost
);
break
;
case
WORKER:
failoverWorker
(
serverHost
,
true
);
default
:
break
;
}
}
private
String
getFailoverLockPath
(
ZKNodeType
zkNodeType
){
switch
(
zkNodeType
){
case
MASTER:
return
getMasterFailoverLockPath
();
case
WORKER:
return
getWorkerFailoverLockPath
();
default
:
return
""
;
}
}
private
void
alertServerDown
(
String
serverHost
,
ZKNodeType
zkNodeType
)
{
String
serverType
=
zkNodeType
.
toString
();
for
(
int
i
=
0
;
i
<
Constants
.
ESCHEDULER_WARN_TIMES_FAILOVER
;
i
++)
{
alertDao
.
sendServerStopedAlert
(
1
,
serverHost
,
serverType
);
}
}
/**
* monitor worker
...
...
@@ -290,8 +282,6 @@ public class ZKMasterClient extends AbstractZKClient {
PathChildrenCache
workerPc
=
new
PathChildrenCache
(
zkClient
,
workerZNodeParentPath
,
true
,
defaultThreadFactory
);
try
{
Date
now
=
new
Date
();
createTime
=
now
;
workerPc
.
start
();
workerPc
.
getListenable
().
addListener
(
new
PathChildrenCacheListener
()
{
@Override
...
...
@@ -303,7 +293,7 @@ public class ZKMasterClient extends AbstractZKClient {
case
CHILD_REMOVED:
String
path
=
event
.
getData
().
getPath
();
logger
.
info
(
"node deleted : {}"
,
event
.
getData
().
getPath
());
removeZKNodePath
(
path
);
removeZKNodePath
(
path
,
ZKNodeType
.
WORKER
,
true
);
break
;
default
:
break
;
...
...
@@ -315,33 +305,6 @@ public class ZKMasterClient extends AbstractZKClient {
}
}
private
void
removeZKNodePath
(
String
path
)
{
InterProcessMutex
mutex
=
null
;
try
{
// handle dead server
handleDeadServer
(
path
,
Constants
.
WORKER_PREFIX
,
Constants
.
ADD_ZK_OP
);
// create a distributed lock
String
znodeLock
=
getWorkerFailoverLockPath
();
mutex
=
new
InterProcessMutex
(
getZkClient
(),
znodeLock
);
mutex
.
acquire
();
String
workerHost
=
getHostByEventDataPath
(
path
);
for
(
int
i
=
0
;
i
<
Constants
.
ESCHEDULER_WARN_TIMES_FAILOVER
;
i
++)
{
alertDao
.
sendServerStopedAlert
(
1
,
workerHost
,
"Worker-Server"
);
}
if
(
StringUtils
.
isNotEmpty
(
workerHost
)){
failoverWorker
(
workerHost
,
true
);
}
}
catch
(
Exception
e
){
logger
.
error
(
"worker failover failed : "
+
e
.
getMessage
(),
e
);
}
finally
{
releaseMutex
(
mutex
);
}
}
/**
* get master znode
...
...
@@ -381,7 +344,7 @@ public class ZKMasterClient extends AbstractZKClient {
return
false
;
}
Date
workerServerStartDate
=
null
;
List
<
MasterServer
>
workerServers
=
getServers
(
ZKNodeType
.
WORKER
);
List
<
MasterServer
>
workerServers
=
getServers
List
(
ZKNodeType
.
WORKER
);
for
(
MasterServer
server
:
workerServers
){
if
(
server
.
getHost
().
equals
(
taskInstance
.
getHost
())){
workerServerStartDate
=
server
.
getCreateTime
();
...
...
@@ -444,24 +407,4 @@ public class ZKMasterClient extends AbstractZKClient {
logger
.
info
(
"master failover end"
);
}
/**
* get host ip, string format: masterParentPath/ip_000001/value
* @param path
* @return
*/
private
String
getHostByEventDataPath
(
String
path
)
{
int
startIndex
=
path
.
lastIndexOf
(
"/"
)+
1
;
int
endIndex
=
path
.
lastIndexOf
(
"_"
);
if
(
startIndex
>=
endIndex
){
logger
.
error
(
"parse ip error"
);
return
""
;
}
return
path
.
substring
(
startIndex
,
endIndex
);
}
}
escheduler-server/src/main/java/cn/escheduler/server/zk/ZKWorkerClient.java
浏览文件 @
e4210d14
...
...
@@ -18,12 +18,12 @@ package cn.escheduler.server.zk;
import
cn.escheduler.common.Constants
;
import
cn.escheduler.common.enums.ZKNodeType
;
import
cn.escheduler.common.utils.CollectionUtils
;
import
cn.escheduler.common.utils.OSUtils
;
import
cn.escheduler.common.zk.AbstractZKClient
;
import
cn.escheduler.dao.DaoFactory
;
import
cn.escheduler.dao.ServerDao
;
import
cn.escheduler.common.utils.ResInfo
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.curator.framework.CuratorFramework
;
import
org.apache.curator.framework.recipes.cache.PathChildrenCache
;
import
org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent
;
...
...
@@ -34,7 +34,6 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.concurrent.ThreadFactory
;
...
...
@@ -130,21 +129,14 @@ public class ZKWorkerClient extends AbstractZKClient {
* register worker
*/
private
void
registWorker
(){
// get current date
Date
now
=
new
Date
();
createTime
=
now
;
try
{
if
(
checkZKNodeExists
(
OSUtils
.
getHost
(),
ZKNodeType
.
WORKER
)){
logger
.
info
(
"register failure , worker already started on : {}, please wait for a moment and try again"
,
OSUtils
.
getHost
());
String
serverPath
=
registerServer
(
ZKNodeType
.
WORKER
);
if
(
StringUtils
.
isEmpty
(
serverPath
)){
System
.
exit
(-
1
);
}
// create worker zknode
initWorkZNode
();
// handle dead server
handleDeadServer
(
workerZNode
,
Constants
.
WORKER_PREFIX
,
Constants
.
DELETE_ZK_OP
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"register worker failure : "
+
e
.
getMessage
(),
e
);
System
.
exit
(-
1
);
}
}
...
...
@@ -167,16 +159,11 @@ public class ZKWorkerClient extends AbstractZKClient {
break
;
case
CHILD_REMOVED:
String
path
=
event
.
getData
().
getPath
();
// handle dead server, add to zk dead server path
handleDeadServer
(
path
,
Constants
.
WORKER_PREFIX
,
Constants
.
ADD_ZK_OP
);
//find myself dead
if
(
workerZNode
.
equals
(
path
)){
logger
.
warn
(
" worker server({}) of myself dead , stopping..."
,
path
);
stoppable
.
stop
(
String
.
format
(
"worker server(%s) of myself dead , stopping"
,
path
));
String
serverHost
=
getHostByEventDataPath
(
path
);
if
(
checkServerSelfDead
(
serverHost
,
ZKNodeType
.
WORKER
)){
return
;
}
logger
.
info
(
"node deleted : {}"
,
event
.
getData
().
getPath
());
break
;
case
CHILD_UPDATED:
break
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录