Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
jobily
Nightingale
提交
55ec76a2
N
Nightingale
项目概览
jobily
/
Nightingale
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
N
Nightingale
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
55ec76a2
编写于
8月 20, 2021
作者:
7
710leo
浏览文件
操作
浏览文件
下载
差异文件
refactor: fix conflicts
上级
a2eab9e5
f651af97
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
193 addition
and
47 deletion
+193
-47
http/router.go
http/router.go
+5
-3
http/router_classpath.go
http/router_classpath.go
+15
-0
judge/handler.go
judge/handler.go
+5
-5
judge/last_event.go
judge/last_event.go
+52
-25
judge/prome_pull.go
judge/prome_pull.go
+16
-12
models/classpath.go
models/classpath.go
+100
-2
未找到文件。
http/router.go
浏览文件 @
55ec76a2
...
...
@@ -86,6 +86,8 @@ func configRoutes(r *gin.Engine) {
pages
.
DELETE
(
"/user-group/:id"
,
login
(),
userGroupDel
)
pages
.
GET
(
"/classpaths"
,
login
(),
classpathListGets
)
pages
.
GET
(
"/classpaths/tree"
,
login
(),
classpathListNodeGets
)
pages
.
GET
(
"/classpaths/tree-node/:id"
,
login
(),
classpathListNodeGetsById
)
pages
.
POST
(
"/classpaths"
,
login
(),
classpathAdd
)
pages
.
PUT
(
"/classpath/:id"
,
login
(),
classpathPut
)
pages
.
DELETE
(
"/classpath/:id"
,
login
(),
classpathDel
)
...
...
@@ -197,13 +199,13 @@ func configRoutes(r *gin.Engine) {
v1
.
POST
(
"/tag-values"
,
GetTagValues
)
v1
.
POST
(
"/tag-pairs"
,
GetTagPairs
)
v1
.
POST
(
"/tag-metrics"
,
GetMetrics
)
v1
.
POST
(
"/push"
,
PushData
)
v1
.
GET
(
"/collect-rules-belong-to-ident"
,
collectRuleGetsByIdent
)
v1
.
GET
(
"/collect-rules-summary"
,
collectRuleSummaryGetByIdent
)
v1
.
GET
(
"/can-do-op-by-name"
,
login
(),
canDoOpByName
)
v1
.
GET
(
"/can-do-op-by-token"
,
login
(),
canDoOpByToken
)
v1
.
POST
(
"/push"
,
PushData
)
v1
.
GET
(
"/collect-rules-belong-to-ident"
,
collectRuleGetsByIdent
)
v1
.
GET
(
"/collect-rules-summary"
,
collectRuleSummaryGetByIdent
)
}
push
:=
r
.
Group
(
"/v1/n9e/series"
)
.
Use
(
gzip
.
Gzip
(
gzip
.
DefaultCompression
))
...
...
http/router_classpath.go
浏览文件 @
55ec76a2
...
...
@@ -24,6 +24,21 @@ func classpathListGets(c *gin.Context) {
},
nil
)
}
func
classpathListNodeGets
(
c
*
gin
.
Context
)
{
query
:=
queryStr
(
c
,
"query"
,
""
)
list
,
err
:=
models
.
ClasspathNodeGets
(
query
)
dangerous
(
err
)
renderData
(
c
,
list
,
nil
)
}
func
classpathListNodeGetsById
(
c
*
gin
.
Context
)
{
cp
:=
Classpath
(
urlParamInt64
(
c
,
"id"
))
children
,
err
:=
cp
.
DirectChildren
()
renderData
(
c
,
children
,
err
)
}
func
classpathFavoriteGet
(
c
*
gin
.
Context
)
{
lst
,
err
:=
loginUser
(
c
)
.
FavoriteClasspaths
()
renderData
(
c
,
lst
,
err
)
...
...
judge/handler.go
浏览文件 @
55ec76a2
...
...
@@ -449,14 +449,14 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
}
now
:=
time
.
Now
()
.
Unix
()
lastEvent
,
exists
:=
LastEvents
.
Get
(
event
.
HashId
)
lastEvent
,
exists
:=
LastEvents
.
Get
(
event
.
RuleId
,
event
.
HashId
)
switch
event
.
IsPromePull
{
case
0
:
// push型的 && 与条件型的
if
exists
&&
lastEvent
.
IsPromePull
==
1
{
// 之前内存中的事件是pull型的,先清空内存中的事件
LastEvents
.
Del
(
event
.
HashId
)
LastEvents
.
Del
(
event
.
RuleId
,
event
.
HashId
)
}
if
isTriggered
{
...
...
@@ -476,7 +476,7 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
// pull型的,产生的事件一定是触发了阈值的,即这个case里不存在recovery的场景,recovery的场景用resolve_timeout的cron来处理
if
exists
&&
lastEvent
.
IsPromePull
==
0
{
// 之前内存中的事件是push型的,先清空内存中的事件
LastEvents
.
Del
(
event
.
HashId
)
LastEvents
.
Del
(
event
.
RuleId
,
event
.
HashId
)
}
// 1. 第一次来,并且AlertDuration=0,直接发送
...
...
@@ -490,7 +490,7 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
SendEvent
(
event
)
}
else
{
// 只有一条事件,显然无法满足for AlertDuration的时间,放到内存里等待
LastEvents
.
Set
(
event
.
HashId
,
event
)
LastEvents
.
Set
(
event
)
}
return
}
...
...
@@ -529,7 +529,7 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
func
SendEvent
(
event
*
models
.
AlertEvent
)
{
// update last event
LastEvents
.
Set
(
event
.
HashId
,
event
)
LastEvents
.
Set
(
event
)
ok
:=
EventQueue
.
PushFront
(
event
)
if
!
ok
{
logger
.
Errorf
(
"push event:%v err"
,
event
)
...
...
judge/last_event.go
浏览文件 @
55ec76a2
...
...
@@ -4,58 +4,85 @@ import (
"sync"
"time"
"github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/v5/models"
"github.com/toolkits/pkg/logger"
)
// rule_id -> hash_id -> *models.AlertEvent
type
SafeEventMap
struct
{
sync
.
RWMutex
M
map
[
string
]
*
models
.
AlertEvent
M
map
[
int64
]
map
[
string
]
*
models
.
AlertEvent
}
var
(
LastEvents
=
&
SafeEventMap
{
M
:
make
(
map
[
string
]
*
models
.
AlertEvent
)}
LastEvents
=
&
SafeEventMap
{
M
:
make
(
map
[
int64
]
map
[
string
]
*
models
.
AlertEvent
)}
)
func
(
s
*
SafeEventMap
)
Get
(
key
string
)
(
*
models
.
AlertEvent
,
bool
)
{
func
(
s
*
SafeEventMap
)
Get
(
ruleId
int64
,
hashId
string
)
(
*
models
.
AlertEvent
,
bool
)
{
s
.
RLock
()
defer
s
.
RUnlock
()
event
,
exists
:=
s
.
M
[
key
]
return
event
,
exists
m
,
has
:=
s
.
M
[
ruleId
]
if
!
has
{
return
nil
,
false
}
event
,
has
:=
m
[
hashId
]
return
event
,
has
}
func
(
s
*
SafeEventMap
)
Set
(
key
string
,
event
*
models
.
AlertEvent
)
{
func
(
s
*
SafeEventMap
)
Set
(
event
*
models
.
AlertEvent
)
{
s
.
Lock
()
defer
s
.
Unlock
()
s
.
M
[
key
]
=
event
m
,
has
:=
s
.
M
[
event
.
RuleId
]
if
!
has
{
m
=
make
(
map
[
string
]
*
models
.
AlertEvent
)
m
[
event
.
HashId
]
=
event
s
.
M
[
event
.
RuleId
]
=
m
}
else
{
s
.
M
[
event
.
RuleId
][
event
.
HashId
]
=
event
}
}
func
(
s
*
SafeEventMap
)
Del
(
key
string
)
{
func
(
s
*
SafeEventMap
)
Del
(
ruleId
int64
,
hashId
string
)
{
s
.
Lock
()
defer
s
.
Unlock
()
delete
(
s
.
M
,
key
)
_
,
has
:=
s
.
M
[
ruleId
]
if
!
has
{
return
}
delete
(
s
.
M
[
ruleId
],
hashId
)
}
func
(
s
*
SafeEventMap
)
DeleteOrSendRecovery
(
promql
string
,
toKeepKeys
map
[
string
]
struct
{})
{
func
(
s
*
SafeEventMap
)
DeleteOrSendRecovery
(
ruleId
int64
,
toKeepKeys
map
[
string
]
struct
{})
{
s
.
Lock
()
defer
s
.
Unlock
()
for
k
,
ev
:=
range
s
.
M
{
m
,
has
:=
s
.
M
[
ruleId
]
if
!
has
{
return
}
for
k
,
ev
:=
range
m
{
if
_
,
loaded
:=
toKeepKeys
[
k
];
loaded
{
continue
}
if
ev
.
ReadableExpression
==
promql
{
logger
.
Debugf
(
"[to_del][ev.IsRecovery:%+v][ev.LastSend:%+v][promql:%v]"
,
ev
.
IsRecovery
,
ev
.
LastSend
,
promql
)
now
:=
time
.
Now
()
.
Unix
()
// promql 没查询到结果,需要将告警标记为已恢复并发送
// 同时需要满足 已经发送过触发信息,并且时间差满足 大于AlertDuration
// 为了避免 发送告警后 一个点 断点了就立即发送恢复信息的case
if
ev
.
IsAlert
()
&&
ev
.
LastSend
&&
now
-
ev
.
TriggerTime
>
ev
.
AlertDuration
{
logger
.
Debugf
(
"[prom.alert.MarkRecov][promql:%v][ev.RuleName:%v]"
,
promql
,
ev
.
RuleName
)
ev
.
MarkRecov
()
EventQueue
.
PushFront
(
ev
)
delete
(
s
.
M
,
k
)
}
// 如果因为promql修改,导致本来是告警状态变成了恢复,也接受
logger
.
Debugf
(
"[to_del][ev.IsRecovery:%+v][ev.LastSend:%+v]"
,
ev
.
IsRecovery
,
ev
.
LastSend
)
// promql 没查询到结果,需要将告警标记为已恢复并发送
// 同时需要满足 已经发送过触发信息,并且时间差满足 大于AlertDuration
// 为了避免 发送告警后 一个点 断点了就立即发送恢复信息的case
now
:=
time
.
Now
()
.
Unix
()
if
ev
.
IsAlert
()
&&
ev
.
LastSend
&&
now
-
ev
.
TriggerTime
>
ev
.
AlertDuration
{
logger
.
Debugf
(
"[prom.alert.MarkRecov][ev.RuleName:%v]"
,
ev
.
RuleName
)
ev
.
MarkRecov
()
EventQueue
.
PushFront
(
ev
)
delete
(
s
.
M
[
ruleId
],
k
)
}
}
}
judge/prome_pull.go
浏览文件 @
55ec76a2
...
...
@@ -41,19 +41,22 @@ type RuleEval struct {
ctx
context
.
Context
}
func
(
re
*
RuleEval
)
start
()
{
logger
.
Debugf
(
"[prome_pull_alert_start][RuleEval: %+v]"
,
re
)
go
func
(
re
*
RuleEval
)
{
func
(
re
RuleEval
)
start
()
{
go
func
(
re
RuleEval
)
{
logger
.
Debugf
(
"[prome_pull_alert_start][RuleEval: %+v]"
,
re
)
if
re
.
R
.
PullExpr
.
EvaluationInterval
<=
0
{
re
.
R
.
PullExpr
.
EvaluationInterval
=
DEFAULT_PULL_ALERT_INTERVAL
}
sleepDuration
:=
time
.
Duration
(
re
.
R
.
PullExpr
.
EvaluationInterval
)
*
time
.
Second
for
{
select
{
case
<-
re
.
ctx
.
Done
()
:
return
case
<-
re
.
quiteChan
:
return
case
<-
time
.
After
(
time
.
Duration
(
re
.
R
.
PullExpr
.
EvaluationInterval
)
*
time
.
Second
)
:
default
:
}
// 获取backend的prometheus DataSource
...
...
@@ -67,12 +70,13 @@ func (re *RuleEval) start() {
promVector
:=
pb
.
QueryVector
(
re
.
R
.
PullExpr
.
PromQl
)
handlePromqlVector
(
promVector
,
re
.
R
)
}
time
.
Sleep
(
sleepDuration
)
}
}(
re
)
}
func
(
r
*
RuleEval
)
stop
()
{
func
(
r
RuleEval
)
stop
()
{
logger
.
Debugf
(
"[prome_pull_alert_stop][RuleEval: %+v]"
,
r
)
close
(
r
.
quiteChan
)
}
...
...
@@ -103,17 +107,17 @@ func (rm *RuleManager) SyncRules(ctx context.Context, rules []models.AlertRule)
}
// 停止旧的
for
hash
,
t
:=
range
rm
.
activeRules
{
for
hash
:=
range
rm
.
activeRules
{
if
_
,
loaded
:=
thisAllRules
[
hash
];
!
loaded
{
t
.
stop
()
rm
.
activeRules
[
hash
]
.
stop
()
delete
(
rm
.
activeRules
,
hash
)
}
}
rm
.
targetMtx
.
Unlock
()
// 开启新的
for
_
,
t
:=
range
thisNewRules
{
t
.
start
()
for
hash
:=
range
thisNewRules
{
t
hisNewRules
[
hash
]
.
start
()
}
}
...
...
@@ -121,7 +125,7 @@ func handlePromqlVector(pv promql.Vector, r models.AlertRule) {
toKeepKeys
:=
map
[
string
]
struct
{}{}
if
len
(
pv
)
==
0
{
// 说明没触发,或者没查询到,删掉rule-id开头的所有event
LastEvents
.
DeleteOrSendRecovery
(
r
.
PullExpr
.
PromQl
,
toKeepKeys
)
LastEvents
.
DeleteOrSendRecovery
(
r
.
Id
,
toKeepKeys
)
return
}
...
...
@@ -191,6 +195,6 @@ func handlePromqlVector(pv promql.Vector, r models.AlertRule) {
logger
.
Debugf
(
"[handlePromqlVector_has_value][event:%+v]
\n
"
,
event
)
sendEventIfNeed
([]
bool
{
true
},
event
,
&
r
)
}
LastEvents
.
DeleteOrSendRecovery
(
r
.
PullExpr
.
PromQl
,
toKeepKeys
)
LastEvents
.
DeleteOrSendRecovery
(
r
.
Id
,
toKeepKeys
)
}
models/classpath.go
浏览文件 @
55ec76a2
...
...
@@ -19,6 +19,14 @@ type Classpath struct {
UpdateBy
string
`json:"update_by"`
}
type
ClasspathNode
struct
{
Id
int64
`json:"id"`
Path
string
`json:"path"`
Note
string
`json:"note"`
Preset
int
`json:"preset"`
Children
[]
*
ClasspathNode
`json:"children"`
}
func
(
c
*
Classpath
)
TableName
()
string
{
return
"classpath"
}
...
...
@@ -104,7 +112,6 @@ func ClasspathGets(query string, limit, offset int) ([]Classpath, error) {
q
:=
"%"
+
query
+
"%"
session
=
session
.
Where
(
"path like ?"
,
q
)
}
var
objs
[]
Classpath
err
:=
session
.
Find
(
&
objs
)
if
err
!=
nil
{
...
...
@@ -151,7 +158,7 @@ func ClasspathGet(where string, args ...interface{}) (*Classpath, error) {
func
ClasspathGetsByPrefix
(
prefix
string
)
([]
Classpath
,
error
)
{
var
objs
[]
Classpath
err
:=
DB
.
Where
(
"path like ?"
,
prefix
+
"%"
)
.
Find
(
&
objs
)
err
:=
DB
.
Where
(
"path like ?"
,
prefix
+
"%"
)
.
OrderBy
(
"path"
)
.
Find
(
&
objs
)
if
err
!=
nil
{
logger
.
Errorf
(
"mysql.error: query classpath fail: %v"
,
err
)
return
objs
,
internalServerError
...
...
@@ -218,3 +225,94 @@ func (c *Classpath) AddResources(idents []string) error {
func
(
c
*
Classpath
)
DelResources
(
idents
[]
string
)
error
{
return
ClasspathResourceDel
(
c
.
Id
,
idents
)
}
func
ClasspathNodeGets
(
query
string
)
([]
*
ClasspathNode
,
error
)
{
session
:=
DB
.
OrderBy
(
"path"
)
if
query
!=
""
{
q
:=
"%"
+
query
+
"%"
session
=
session
.
Where
(
"path like ?"
,
q
)
}
var
objs
[]
Classpath
err
:=
session
.
Find
(
&
objs
)
if
err
!=
nil
{
logger
.
Errorf
(
"mysql.error: query classpath fail: %v"
,
err
)
return
[]
*
ClasspathNode
{},
internalServerError
}
if
len
(
objs
)
==
0
{
return
[]
*
ClasspathNode
{},
nil
}
pcs
:=
ClasspathNodeAllChildren
(
objs
)
return
pcs
,
nil
}
func
(
cp
*
Classpath
)
DirectChildren
()
([]
Classpath
,
error
)
{
var
pcs
[]
Classpath
objs
,
err
:=
ClasspathGetsByPrefix
(
cp
.
Path
)
if
err
!=
nil
{
logger
.
Errorf
(
"mysql.error: query prefix classpath fail: %v"
,
err
)
return
[]
Classpath
{},
internalServerError
}
if
len
(
objs
)
<
2
{
return
[]
Classpath
{},
nil
}
pre
:=
objs
[
1
]
path
:=
pre
.
Path
[
len
(
objs
[
0
]
.
Path
)
:
]
pre
.
Path
=
path
pcs
=
append
(
pcs
,
pre
)
for
_
,
cp
:=
range
objs
[
2
:
]
{
has
:=
strings
.
HasPrefix
(
cp
.
Path
,
pre
.
Path
)
if
!
has
{
path
:=
cp
.
Path
[
len
(
objs
[
0
]
.
Path
)
:
]
pre
.
Path
=
path
pcs
=
append
(
pcs
,
pre
)
pre
=
cp
}
}
return
pcs
,
nil
}
func
ClasspathNodeAllChildren
(
cps
[]
Classpath
)
[]
*
ClasspathNode
{
var
node
ClasspathNode
for
_
,
cp
:=
range
cps
{
ListInsert
(
cp
,
&
node
)
}
return
node
.
Children
}
func
ListInsert
(
obj
Classpath
,
node
*
ClasspathNode
)
{
path
:=
obj
.
Path
has
:=
true
for
{
if
len
(
node
.
Children
)
==
0
{
break
}
children
:=
node
.
Children
[
len
(
node
.
Children
)
-
1
]
prefix
:=
children
.
Path
has
=
strings
.
HasPrefix
(
path
,
prefix
)
if
!
has
{
break
}
path
=
path
[
len
(
prefix
)
:
]
node
=
children
}
newNode
:=
ToClasspathNode
(
obj
,
path
)
node
.
Children
=
append
(
node
.
Children
,
&
newNode
)
}
func
ToClasspathNode
(
cp
Classpath
,
path
string
)
ClasspathNode
{
var
obj
ClasspathNode
obj
.
Id
=
cp
.
Id
obj
.
Path
=
path
obj
.
Note
=
cp
.
Note
obj
.
Preset
=
cp
.
Preset
obj
.
Children
=
[]
*
ClasspathNode
{}
return
obj
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录