Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
jobily
Nightingale
提交
a7cf8f9e
N
Nightingale
项目概览
jobily
/
Nightingale
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
N
Nightingale
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a7cf8f9e
编写于
8月 14, 2021
作者:
U
Ulric Qin
提交者:
Gitee
8月 14, 2021
浏览文件
操作
浏览文件
下载
差异文件
!1 fix judge prom
Merge pull request !1 from Ulric Qin/judge_prom_bugfix
上级
ca8a8701
0b4e3b96
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
59 addition
and
32 deletion
+59
-32
judge/handler.go
judge/handler.go
+5
-5
judge/last_event.go
judge/last_event.go
+52
-25
judge/prome_pull.go
judge/prome_pull.go
+2
-2
未找到文件。
judge/handler.go
浏览文件 @
a7cf8f9e
...
...
@@ -449,14 +449,14 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
}
now
:=
time
.
Now
()
.
Unix
()
lastEvent
,
exists
:=
LastEvents
.
Get
(
event
.
HashId
)
lastEvent
,
exists
:=
LastEvents
.
Get
(
event
.
RuleId
,
event
.
HashId
)
switch
event
.
IsPromePull
{
case
0
:
// push型的 && 与条件型的
if
exists
&&
lastEvent
.
IsPromePull
==
1
{
// 之前内存中的事件是pull型的,先清空内存中的事件
LastEvents
.
Del
(
event
.
HashId
)
LastEvents
.
Del
(
event
.
RuleId
,
event
.
HashId
)
}
if
isTriggered
{
...
...
@@ -476,7 +476,7 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
// pull型的,产生的事件一定是触发了阈值的,即这个case里不存在recovery的场景,recovery的场景用resolve_timeout的cron来处理
if
exists
&&
lastEvent
.
IsPromePull
==
0
{
// 之前内存中的事件是push型的,先清空内存中的事件
LastEvents
.
Del
(
event
.
HashId
)
LastEvents
.
Del
(
event
.
RuleId
,
event
.
HashId
)
}
// 1. 第一次来,并且AlertDuration=0,直接发送
...
...
@@ -490,7 +490,7 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
SendEvent
(
event
)
}
else
{
// 只有一条事件,显然无法满足for AlertDuration的时间,放到内存里等待
LastEvents
.
Set
(
event
.
HashId
,
event
)
LastEvents
.
Set
(
event
)
}
return
}
...
...
@@ -529,7 +529,7 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
func
SendEvent
(
event
*
models
.
AlertEvent
)
{
// update last event
LastEvents
.
Set
(
event
.
HashId
,
event
)
LastEvents
.
Set
(
event
)
ok
:=
EventQueue
.
PushFront
(
event
)
if
!
ok
{
logger
.
Errorf
(
"push event:%v err"
,
event
)
...
...
judge/last_event.go
浏览文件 @
a7cf8f9e
...
...
@@ -4,58 +4,85 @@ import (
"sync"
"time"
"github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/v5/models"
"github.com/toolkits/pkg/logger"
)
// rule_id -> hash_id -> *models.AlertEvent
type
SafeEventMap
struct
{
sync
.
RWMutex
M
map
[
string
]
*
models
.
AlertEvent
M
map
[
int64
]
map
[
string
]
*
models
.
AlertEvent
}
var
(
LastEvents
=
&
SafeEventMap
{
M
:
make
(
map
[
string
]
*
models
.
AlertEvent
)}
LastEvents
=
&
SafeEventMap
{
M
:
make
(
map
[
int64
]
map
[
string
]
*
models
.
AlertEvent
)}
)
func
(
s
*
SafeEventMap
)
Get
(
key
string
)
(
*
models
.
AlertEvent
,
bool
)
{
func
(
s
*
SafeEventMap
)
Get
(
ruleId
int64
,
hashId
string
)
(
*
models
.
AlertEvent
,
bool
)
{
s
.
RLock
()
defer
s
.
RUnlock
()
event
,
exists
:=
s
.
M
[
key
]
return
event
,
exists
m
,
has
:=
s
.
M
[
ruleId
]
if
!
has
{
return
nil
,
false
}
event
,
has
:=
m
[
hashId
]
return
event
,
has
}
func
(
s
*
SafeEventMap
)
Set
(
key
string
,
event
*
models
.
AlertEvent
)
{
func
(
s
*
SafeEventMap
)
Set
(
event
*
models
.
AlertEvent
)
{
s
.
Lock
()
defer
s
.
Unlock
()
s
.
M
[
key
]
=
event
m
,
has
:=
s
.
M
[
event
.
RuleId
]
if
!
has
{
m
=
make
(
map
[
string
]
*
models
.
AlertEvent
)
m
[
event
.
HashId
]
=
event
s
.
M
[
event
.
RuleId
]
=
m
}
else
{
s
.
M
[
event
.
RuleId
][
event
.
HashId
]
=
event
}
}
func
(
s
*
SafeEventMap
)
Del
(
key
string
)
{
func
(
s
*
SafeEventMap
)
Del
(
ruleId
int64
,
hashId
string
)
{
s
.
Lock
()
defer
s
.
Unlock
()
delete
(
s
.
M
,
key
)
_
,
has
:=
s
.
M
[
ruleId
]
if
!
has
{
return
}
delete
(
s
.
M
[
ruleId
],
hashId
)
}
func
(
s
*
SafeEventMap
)
DeleteOrSendRecovery
(
promql
string
,
toKeepKeys
map
[
string
]
struct
{})
{
func
(
s
*
SafeEventMap
)
DeleteOrSendRecovery
(
ruleId
int64
,
toKeepKeys
map
[
string
]
struct
{})
{
s
.
Lock
()
defer
s
.
Unlock
()
for
k
,
ev
:=
range
s
.
M
{
m
,
has
:=
s
.
M
[
ruleId
]
if
!
has
{
return
}
for
k
,
ev
:=
range
m
{
if
_
,
loaded
:=
toKeepKeys
[
k
];
loaded
{
continue
}
if
ev
.
ReadableExpression
==
promql
{
logger
.
Debugf
(
"[to_del][ev.IsRecovery:%+v][ev.LastSend:%+v][promql:%v]"
,
ev
.
IsRecovery
,
ev
.
LastSend
,
promql
)
now
:=
time
.
Now
()
.
Unix
()
// promql 没查询到结果,需要将告警标记为已恢复并发送
// 同时需要满足 已经发送过触发信息,并且时间差满足 大于AlertDuration
// 为了避免 发送告警后 一个点 断点了就立即发送恢复信息的case
if
ev
.
IsAlert
()
&&
ev
.
LastSend
&&
now
-
ev
.
TriggerTime
>
ev
.
AlertDuration
{
logger
.
Debugf
(
"[prom.alert.MarkRecov][promql:%v][ev.RuleName:%v]"
,
promql
,
ev
.
RuleName
)
ev
.
MarkRecov
()
EventQueue
.
PushFront
(
ev
)
delete
(
s
.
M
,
k
)
}
// 如果因为promql修改,导致本来是告警状态变成了恢复,也接受
logger
.
Debugf
(
"[to_del][ev.IsRecovery:%+v][ev.LastSend:%+v]"
,
ev
.
IsRecovery
,
ev
.
LastSend
)
// promql 没查询到结果,需要将告警标记为已恢复并发送
// 同时需要满足 已经发送过触发信息,并且时间差满足 大于AlertDuration
// 为了避免 发送告警后 一个点 断点了就立即发送恢复信息的case
now
:=
time
.
Now
()
.
Unix
()
if
ev
.
IsAlert
()
&&
ev
.
LastSend
&&
now
-
ev
.
TriggerTime
>
ev
.
AlertDuration
{
logger
.
Debugf
(
"[prom.alert.MarkRecov][ev.RuleName:%v]"
,
ev
.
RuleName
)
ev
.
MarkRecov
()
EventQueue
.
PushFront
(
ev
)
delete
(
s
.
M
[
ruleId
],
k
)
}
}
}
judge/prome_pull.go
浏览文件 @
a7cf8f9e
...
...
@@ -121,7 +121,7 @@ func handlePromqlVector(pv promql.Vector, r models.AlertRule) {
toKeepKeys
:=
map
[
string
]
struct
{}{}
if
len
(
pv
)
==
0
{
// 说明没触发,或者没查询到,删掉rule-id开头的所有event
LastEvents
.
DeleteOrSendRecovery
(
r
.
PullExpr
.
PromQl
,
toKeepKeys
)
LastEvents
.
DeleteOrSendRecovery
(
r
.
Id
,
toKeepKeys
)
return
}
...
...
@@ -191,6 +191,6 @@ func handlePromqlVector(pv promql.Vector, r models.AlertRule) {
logger
.
Debugf
(
"[handlePromqlVector_has_value][event:%+v]
\n
"
,
event
)
sendEventIfNeed
([]
bool
{
true
},
event
,
&
r
)
}
LastEvents
.
DeleteOrSendRecovery
(
r
.
PullExpr
.
PromQl
,
toKeepKeys
)
LastEvents
.
DeleteOrSendRecovery
(
r
.
Id
,
toKeepKeys
)
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录