Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
淡淡忧伤的程序员
DolphinScheduler
提交
a7a4a316
DolphinScheduler
项目概览
淡淡忧伤的程序员
/
DolphinScheduler
与 Fork 源项目一致
Fork自
apache / DolphinScheduler
通知
48
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
DolphinScheduler
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
a7a4a316
编写于
4月 25, 2019
作者:
B
baoliang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update process time out
上级
bb9012ec
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
95 addition
and
34 deletion
+95
-34
escheduler-dao/src/main/java/cn/escheduler/dao/AlertDao.java
escheduler-dao/src/main/java/cn/escheduler/dao/AlertDao.java
+33
-2
escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterExecThread.java
.../cn/escheduler/server/master/runner/MasterExecThread.java
+29
-3
escheduler-server/src/main/java/cn/escheduler/server/utils/AlertManager.java
...rc/main/java/cn/escheduler/server/utils/AlertManager.java
+31
-27
escheduler-server/src/test/java/cn/escheduler/server/master/AlertManagerTest.java
...st/java/cn/escheduler/server/master/AlertManagerTest.java
+2
-2
未找到文件。
escheduler-dao/src/main/java/cn/escheduler/dao/AlertDao.java
浏览文件 @
a7a4a316
...
...
@@ -23,6 +23,8 @@ import cn.escheduler.dao.datasource.ConnectionFactory;
import
cn.escheduler.dao.mapper.AlertMapper
;
import
cn.escheduler.dao.mapper.UserAlertGroupMapper
;
import
cn.escheduler.dao.model.Alert
;
import
cn.escheduler.dao.model.ProcessDefinition
;
import
cn.escheduler.dao.model.ProcessInstance
;
import
cn.escheduler.dao.model.User
;
import
org.apache.commons.lang3.StringUtils
;
import
org.slf4j.Logger
;
...
...
@@ -83,8 +85,9 @@ public class AlertDao extends AbstractBaseDao {
*/
public
void
sendServerStopedAlert
(
int
alertgroupId
,
String
host
,
String
serverType
){
Alert
alert
=
new
Alert
();
String
content
=
String
.
format
(
"[{'type':'%s','host':'%s','event':'服务挂掉','警告级别':'严重'}]"
,
serverType
,
host
);
alert
.
setTitle
(
"容错告警"
);
String
content
=
String
.
format
(
"[{'type':'%s','host':'%s','event':'server down','warning level':'serious'}]"
,
serverType
,
host
);
alert
.
setTitle
(
"Fault tolerance warning"
);
alert
.
setShowType
(
ShowType
.
TABLE
);
alert
.
setContent
(
content
);
alert
.
setAlertType
(
AlertType
.
EMAIL
);
...
...
@@ -94,6 +97,34 @@ public class AlertDao extends AbstractBaseDao {
alertMapper
.
insert
(
alert
);
}
/**
* process time out alert
* @param processInstance
* @param processDefinition
*/
public
void
sendProcessTimeoutAlert
(
ProcessInstance
processInstance
,
ProcessDefinition
processDefinition
){
int
alertgroupId
=
processInstance
.
getWarningGroupId
();
String
receivers
=
processDefinition
.
getReceivers
();
String
receiversCc
=
processDefinition
.
getReceiversCc
();
Alert
alert
=
new
Alert
();
String
content
=
String
.
format
(
"[{'id':'%d','name':'%s','event':'timeout','warnLevel':'middle'}]"
,
processInstance
.
getId
(),
processInstance
.
getName
());
alert
.
setTitle
(
"Process Timeout Warn"
);
alert
.
setShowType
(
ShowType
.
TABLE
);
alert
.
setContent
(
content
);
alert
.
setAlertType
(
AlertType
.
EMAIL
);
alert
.
setAlertGroupId
(
alertgroupId
);
if
(
StringUtils
.
isNotEmpty
(
receivers
))
{
alert
.
setReceivers
(
receivers
);
}
if
(
StringUtils
.
isNotEmpty
(
receiversCc
))
{
alert
.
setReceiversCc
(
receiversCc
);
}
alert
.
setCreateTime
(
new
Date
());
alert
.
setUpdateTime
(
new
Date
());
alertMapper
.
insert
(
alert
);
}
/**
* task timeout warn
*/
...
...
escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterExecThread.java
浏览文件 @
a7a4a316
...
...
@@ -258,7 +258,7 @@ public class MasterExecThread implements Runnable {
processDao
.
createRecoveryWaitingThreadCommand
(
null
,
processInstance
);
}
List
<
TaskInstance
>
taskInstances
=
processDao
.
findValidTaskListByProcessId
(
processInstance
.
getId
());
alertManager
.
send
WarnningOf
ProcessInstance
(
processInstance
,
taskInstances
);
alertManager
.
send
Alert
ProcessInstance
(
processInstance
,
taskInstances
);
}
...
...
@@ -775,8 +775,15 @@ public class MasterExecThread implements Runnable {
private
void
runProcess
(){
// submit start node
submitPostNode
(
null
);
// submitStandByTask()
;
boolean
sendTimeWarning
=
false
;
while
(!
processInstance
.
IsProcessInstanceStop
()){
// send warning email if process time out.
if
(
!
sendTimeWarning
&&
checkProcessTimeOut
(
processInstance
)
){
alertManager
.
sendProcessTimeoutAlert
(
processInstance
,
processDao
.
findProcessDefineById
(
processInstance
.
getProcessDefinitionId
()));
sendTimeWarning
=
true
;
}
Set
<
MasterBaseTaskExecThread
>
keys
=
activeTaskNode
.
keySet
();
for
(
MasterBaseTaskExecThread
taskExecThread
:
keys
)
{
Future
<
Boolean
>
future
=
activeTaskNode
.
get
(
taskExecThread
);
...
...
@@ -821,7 +828,7 @@ public class MasterExecThread implements Runnable {
}
// send alert
if
(
this
.
recoverToleranceFaultTaskList
.
size
()
>
0
){
alertManager
.
send
WarnningWorker
leranceFault
(
processInstance
,
recoverToleranceFaultTaskList
);
alertManager
.
send
AlertWorkerTo
leranceFault
(
processInstance
,
recoverToleranceFaultTaskList
);
this
.
recoverToleranceFaultTaskList
.
clear
();
}
// updateProcessInstance completed task status
...
...
@@ -851,6 +858,25 @@ public class MasterExecThread implements Runnable {
logger
.
info
(
"process:{} end, state :{}"
,
processInstance
.
getId
(),
processInstance
.
getState
());
}
/**
* check process time out
* @param processInstance
* @return
*/
private
boolean
checkProcessTimeOut
(
ProcessInstance
processInstance
)
{
if
(
processInstance
.
getTimeout
()
==
0
){
return
false
;
}
Date
now
=
new
Date
();
long
runningTime
=
DateUtils
.
differMs
(
now
,
processInstance
.
getStartTime
());
if
(
runningTime
>
processInstance
.
getTimeout
()){
return
true
;
}
return
false
;
}
private
boolean
canSubmitTaskToQueue
()
{
return
OSUtils
.
checkResource
(
conf
,
true
);
}
...
...
escheduler-server/src/main/java/cn/escheduler/server/utils/AlertManager.java
浏览文件 @
a7a4a316
...
...
@@ -26,6 +26,7 @@ import cn.escheduler.common.utils.JSONUtils;
import
cn.escheduler.dao.AlertDao
;
import
cn.escheduler.dao.DaoFactory
;
import
cn.escheduler.dao.model.Alert
;
import
cn.escheduler.dao.model.ProcessDefinition
;
import
cn.escheduler.dao.model.ProcessInstance
;
import
cn.escheduler.dao.model.TaskInstance
;
import
org.slf4j.Logger
;
...
...
@@ -54,27 +55,27 @@ public class AlertManager {
private
String
getCommandCnName
(
CommandType
commandType
)
{
switch
(
commandType
)
{
case
RECOVER_TOLERANCE_FAULT_PROCESS:
return
"
恢复容错
"
;
return
"
recover tolerance fault process
"
;
case
RECOVER_SUSPENDED_PROCESS:
return
"
恢复暂停流程
"
;
return
"
recover suspended process
"
;
case
START_CURRENT_TASK_PROCESS:
return
"
从当前节点开始执行
"
;
return
"
start current task process
"
;
case
START_FAILURE_TASK_PROCESS:
return
"
从失败节点开始执行
"
;
return
"
start failure task process
"
;
case
START_PROCESS:
return
"
启动工作流
"
;
return
"
start process
"
;
case
REPEAT_RUNNING:
return
"
重跑
"
;
return
"
repeat running
"
;
case
SCHEDULER:
return
"
定时执行
"
;
return
"
scheduler
"
;
case
COMPLEMENT_DATA:
return
"
补数
"
;
return
"
complement data
"
;
case
PAUSE:
return
"
暂停工作流
"
;
return
"
pause
"
;
case
STOP:
return
"
停止工作流
"
;
return
"
stop
"
;
default
:
return
"
未知的命令类型
"
;
return
"
unknown type
"
;
}
}
...
...
@@ -124,14 +125,14 @@ public class AlertManager {
continue
;
}
LinkedHashMap
<
String
,
String
>
failedTaskMap
=
new
LinkedHashMap
();
failedTaskMap
.
put
(
"
任务
id"
,
String
.
valueOf
(
task
.
getId
()));
failedTaskMap
.
put
(
"
任务名称
"
,
task
.
getName
());
failedTaskMap
.
put
(
"
任务类型
"
,
task
.
getTaskType
());
failedTaskMap
.
put
(
"
任务状态
"
,
task
.
getState
().
toString
());
failedTaskMap
.
put
(
"
任务开始时间
"
,
DateUtils
.
dateToString
(
task
.
getStartTime
()));
failedTaskMap
.
put
(
"
任务结束时间
"
,
DateUtils
.
dateToString
(
task
.
getEndTime
()));
failedTaskMap
.
put
(
"
task
id"
,
String
.
valueOf
(
task
.
getId
()));
failedTaskMap
.
put
(
"
task name
"
,
task
.
getName
());
failedTaskMap
.
put
(
"
task type
"
,
task
.
getTaskType
());
failedTaskMap
.
put
(
"
task state
"
,
task
.
getState
().
toString
());
failedTaskMap
.
put
(
"
task start time
"
,
DateUtils
.
dateToString
(
task
.
getStartTime
()));
failedTaskMap
.
put
(
"
task end time
"
,
DateUtils
.
dateToString
(
task
.
getEndTime
()));
failedTaskMap
.
put
(
"host"
,
task
.
getHost
());
failedTaskMap
.
put
(
"
日志路径
"
,
task
.
getLogPath
());
failedTaskMap
.
put
(
"
log path
"
,
task
.
getLogPath
());
failedTaskList
.
add
(
failedTaskMap
);
}
res
=
JSONUtils
.
toJson
(
failedTaskList
);
...
...
@@ -152,10 +153,10 @@ public class AlertManager {
for
(
TaskInstance
taskInstance:
toleranceTaskList
){
LinkedHashMap
<
String
,
String
>
toleranceWorkerContentMap
=
new
LinkedHashMap
();
toleranceWorkerContentMap
.
put
(
"
工作流程名称
"
,
processInstance
.
getName
());
toleranceWorkerContentMap
.
put
(
"
容错任务名称
"
,
taskInstance
.
getName
());
toleranceWorkerContentMap
.
put
(
"
容错机器IP
"
,
taskInstance
.
getHost
());
toleranceWorkerContentMap
.
put
(
"
任务失败次数
"
,
String
.
valueOf
(
taskInstance
.
getRetryTimes
()));
toleranceWorkerContentMap
.
put
(
"
process name
"
,
processInstance
.
getName
());
toleranceWorkerContentMap
.
put
(
"
task name
"
,
taskInstance
.
getName
());
toleranceWorkerContentMap
.
put
(
"
host
"
,
taskInstance
.
getHost
());
toleranceWorkerContentMap
.
put
(
"
task retry times
"
,
String
.
valueOf
(
taskInstance
.
getRetryTimes
()));
toleranceTaskInstanceList
.
add
(
toleranceWorkerContentMap
);
}
return
JSONUtils
.
toJson
(
toleranceTaskInstanceList
);
...
...
@@ -166,9 +167,9 @@ public class AlertManager {
* @param processInstance
* @param toleranceTaskList
*/
public
void
send
WarnningWorker
leranceFault
(
ProcessInstance
processInstance
,
List
<
TaskInstance
>
toleranceTaskList
){
public
void
send
AlertWorkerTo
leranceFault
(
ProcessInstance
processInstance
,
List
<
TaskInstance
>
toleranceTaskList
){
Alert
alert
=
new
Alert
();
alert
.
setTitle
(
"worker
容错报警
"
);
alert
.
setTitle
(
"worker
fault tolerance
"
);
alert
.
setShowType
(
ShowType
.
TABLE
);
String
content
=
getWorkerToleranceContent
(
processInstance
,
toleranceTaskList
);
alert
.
setContent
(
content
);
...
...
@@ -187,8 +188,8 @@ public class AlertManager {
* send process instance alert
* @param processInstance
*/
public
void
send
WarnningOf
ProcessInstance
(
ProcessInstance
processInstance
,
List
<
TaskInstance
>
taskInstances
){
public
void
send
Alert
ProcessInstance
(
ProcessInstance
processInstance
,
List
<
TaskInstance
>
taskInstances
){
boolean
sendWarnning
=
false
;
WarningType
warningType
=
processInstance
.
getWarningType
();
...
...
@@ -217,7 +218,7 @@ public class AlertManager {
String
cmdName
=
getCommandCnName
(
processInstance
.
getCommandType
());
String
success
=
processInstance
.
getState
().
typeIsSuccess
()
?
"
成功"
:
"失败
"
;
String
success
=
processInstance
.
getState
().
typeIsSuccess
()
?
"
success"
:
"failed
"
;
alert
.
setTitle
(
cmdName
+
success
);
ShowType
showType
=
processInstance
.
getState
().
typeIsSuccess
()
?
ShowType
.
TEXT
:
ShowType
.
TABLE
;
alert
.
setShowType
(
showType
);
...
...
@@ -233,4 +234,7 @@ public class AlertManager {
logger
.
info
(
"add alert to db , alert: {}"
,
alert
.
toString
());
}
public
void
sendProcessTimeoutAlert
(
ProcessInstance
processInstance
,
ProcessDefinition
processDefinition
)
{
alertDao
.
sendProcessTimeoutAlert
(
processInstance
,
processDefinition
);
}
}
escheduler-server/src/test/java/cn/escheduler/server/master/AlertManagerTest.java
浏览文件 @
a7a4a316
...
...
@@ -76,7 +76,7 @@ public class AlertManagerTest {
toleranceTaskList
.
add
(
toleranceTask1
);
toleranceTaskList
.
add
(
toleranceTask2
);
alertManager
.
send
WarnningWorker
leranceFault
(
processInstance
,
toleranceTaskList
);
alertManager
.
send
AlertWorkerTo
leranceFault
(
processInstance
,
toleranceTaskList
);
}
...
...
@@ -103,7 +103,7 @@ public class AlertManagerTest {
toleranceTaskList
.
add
(
toleranceTask1
);
toleranceTaskList
.
add
(
toleranceTask2
);
alertManager
.
send
WarnningOf
ProcessInstance
(
processInstance
,
toleranceTaskList
);
alertManager
.
send
Alert
ProcessInstance
(
processInstance
,
toleranceTaskList
);
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录