未验证 提交 4eb79fb0 编写于 作者: I Istil 提交者: GitHub

feat: support history alert events store (#760)

上级 c38d595c
......@@ -66,10 +66,7 @@ func consume(events []interface{}, sema *semaphore.Semaphore) {
event.MarkMuted()
if config.Config.Alert.MutedAlertPersist {
err := event.Add()
if err != nil {
logger.Warningf("event_consume: insert muted event err:%v, event:%+v", err, event)
}
persist(event)
}
continue
......@@ -165,6 +162,11 @@ func persist(event *models.AlertEvent) {
logger.Warningf("event_consume: insert alert event err:%v, event:%+v", err, event)
}
}
obj := ToHistoryAlertEvent(event)
err := obj.Add()
if err != nil {
logger.Warningf("event_consume: insert history alert event err:%v, event:%+v", err, event)
}
}
type AlertMsg struct {
......@@ -295,3 +297,29 @@ func enrichTag(event *models.AlertEvent, alertRule *models.AlertRule) {
sort.Strings(tagList)
event.Tags = strings.Join(tagList, " ")
}
func ToHistoryAlertEvent(ae *models.AlertEvent) *models.HistoryAlertEvent {
var obj models.HistoryAlertEvent
obj.RuleId = ae.RuleId
obj.RuleName = ae.RuleName
obj.RuleNote = ae.RuleNote
obj.HashId = ae.HashId
obj.IsPromePull = ae.IsPromePull
obj.ResClasspaths = ae.ResClasspaths
obj.ResIdent = ae.ResIdent
obj.Priority = ae.Priority
obj.Status = ae.Status
obj.IsRecovery = ae.IsRecovery
obj.HistoryPoints = ae.HistoryPoints
obj.TriggerTime = ae.TriggerTime
obj.Values = ae.Values
obj.NotifyChannels = ae.NotifyChannels
obj.NotifyGroups = ae.NotifyGroups
obj.NotifyUsers = ae.NotifyUsers
obj.RunbookUrl = ae.RunbookUrl
obj.ReadableExpression = ae.ReadableExpression
obj.Tags = ae.Tags
obj.NotifyGroupObjs = ae.NotifyGroupObjs
obj.NotifyUserObjs = ae.NotifyUserObjs
return &obj
}
......@@ -374,6 +374,17 @@ func AlertEvent(id int64) *models.AlertEvent {
return obj
}
func HistoryAlertEvent(id int64) *models.HistoryAlertEvent {
obj, err := models.HistoryAlertEventGet("id=?", id)
dangerous(err)
if obj == nil {
bomb(http.StatusNotFound, "No such alert all event")
}
return obj
}
func CollectRule(id int64) *models.CollectRule {
obj, err := models.CollectRuleGet("id=?", id)
dangerous(err)
......
......@@ -155,6 +155,9 @@ func configRoutes(r *gin.Engine) {
pages.GET("/alert-event/:id", login(), alertEventGet)
pages.DELETE("/alert-event/:id", login(), alertEventDel)
pages.GET("/history-alert-events", login(), historyAlertEventGets)
pages.GET("/history-alert-event/:id", login(), historyAlertEventGet)
pages.GET("/classpath/:id/collect-rules", login(), collectRuleGets)
pages.POST("/collect-rules", login(), collectRuleAdd)
pages.DELETE("/collect-rules", login(), collectRuleDel)
......@@ -283,6 +286,9 @@ func configRoutes(r *gin.Engine) {
v1.GET("/alert-event/:id", login(), alertEventGet)
v1.DELETE("/alert-event/:id", login(), alertEventDel)
v1.GET("/history-alert-events", login(), historyAlertEventGets)
v1.GET("/history-alert-event/:id", login(), historyAlertEventGet)
v1.POST("/collect-rules", login(), collectRuleAdd)
v1.DELETE("/collect-rules", login(), collectRuleDel)
v1.PUT("/collect-rule/:id", login(), collectRulePut)
......
package http
import (
"time"
"github.com/gin-gonic/gin"
"github.com/didi/nightingale/v5/models"
)
func historyAlertEventGets(c *gin.Context) {
stime := queryInt64(c, "stime", 0)
etime := queryInt64(c, "etime", 0)
hours := queryInt64(c, "hours", 0)
now := time.Now().Unix()
if hours != 0 {
stime = now - 3600*hours
etime = now + 3600*24
}
if stime != 0 && etime == 0 {
etime = now + 3600*24
}
query := queryStr(c, "query", "")
priority := queryInt(c, "priority", -1)
status := queryInt(c, "status", -1)
isRecovery := queryInt(c, "is_recovery", -1)
limit := queryInt(c, "limit", defaultLimit)
total, err := models.HistoryAlertEventsTotal(stime, etime, query, status, isRecovery, priority)
dangerous(err)
list, err := models.HistoryAlertEventGets(stime, etime, query, status, isRecovery, priority, limit, offset(c, limit))
dangerous(err)
for i := 0; i < len(list); i++ {
dangerous(list[i].FillObjs())
}
if len(list) == 0 {
renderZeroPage(c)
return
}
renderData(c, map[string]interface{}{
"total": total,
"list": list,
}, nil)
}
func historyAlertEventGet(c *gin.Context) {
ae := HistoryAlertEvent(urlParamInt64(c, "id"))
dangerous(ae.FillObjs())
renderData(c, ae, nil)
}
package models
import (
"encoding/json"
"strings"
//"github.com/didi/nightingale/v5/vos"
"github.com/toolkits/pkg/logger"
"xorm.io/builder"
)
type HistoryAlertEvent struct {
Id int64 `json:"id"`
RuleId int64 `json:"rule_id"`
RuleName string `json:"rule_name"`
RuleNote string `json:"rule_note"`
HashId string `json:"hash_id"` // 唯一标识
IsPromePull int `json:"is_prome_pull"` // 代表是否是prometheus pull告警,为1时前端使用 ReadableExpression 拉取最近1小时数据
ResClasspaths string `json:"res_classpaths"`
ResIdent string `json:"res_ident" xorm:"-"` // res_ident会出现在tags字段,就不用单独写入数据库了,但是各块逻辑中有个单独的res_ident字段更便于处理,所以struct里还留有这个字段;前端不用展示这个字段
Priority int `json:"priority"`
Status int `json:"status"` // 标识是否 被屏蔽
IsRecovery int `json:"is_recovery"` // 0: alert, 1: recovery
HistoryPoints json.RawMessage `json:"history_points"` // HistoryPoints{}
TriggerTime int64 `json:"trigger_time"`
Values string `json:"values" xorm:"-"` // e.g. cpu.idle: 23.3; load.1min: 32
NotifyChannels string `json:"notify_channels"`
NotifyGroups string `json:"notify_groups"`
NotifyUsers string `json:"notify_users"`
RunbookUrl string `json:"runbook_url"`
ReadableExpression string `json:"readable_expression"` // e.g. mem.bytes.used.percent(all,60s) > 0
Tags string `json:"tags"` // merge data_tags rule_tags and res_tags
NotifyGroupObjs []UserGroup `json:"notify_group_objs" xorm:"-"`
NotifyUserObjs []User `json:"notify_user_objs" xorm:"-"`
}
// IsAlert 语法糖,避免直接拿IsRecovery字段做比对不直观易出错
func (hae *HistoryAlertEvent) IsAlert() bool {
return hae.IsRecovery != 1
}
// IsRecov 语法糖,避免直接拿IsRecovery字段做比对不直观易出错
func (hae *HistoryAlertEvent) IsRecov() bool {
return hae.IsRecovery == 1
}
// MarkAlert 语法糖,标记为告警状态
func (hae *HistoryAlertEvent) MarkAlert() {
hae.IsRecovery = 0
}
// MarkRecov 语法糖,标记为恢复状态
func (hae *HistoryAlertEvent) MarkRecov() {
hae.IsRecovery = 1
}
// MarkMuted 语法糖,标记为屏蔽状态
func (hae *HistoryAlertEvent) MarkMuted() {
hae.Status = 1
}
func (hae *HistoryAlertEvent) FillObjs() error {
userGroupIds := strings.Fields(hae.NotifyGroups)
if len(userGroupIds) > 0 {
groups, err := UserGroupGetsByIdsStr(userGroupIds)
if err != nil {
return err
}
hae.NotifyGroupObjs = groups
}
userIds := strings.Fields(hae.NotifyUsers)
if len(userIds) > 0 {
users, err := UserGetsByIdsStr(userIds)
if err != nil {
return err
}
hae.NotifyUserObjs = users
}
return nil
}
func (hae *HistoryAlertEvent) Add() error {
return DBInsertOne(hae)
}
func HistoryAlertEventsTotal(stime, etime int64, query string, status, isRecovery, priority int) (num int64, err error) {
cond := builder.NewCond()
if stime != 0 && etime != 0 {
cond = cond.And(builder.Between{Col: "trigger_time", LessVal: stime, MoreVal: etime})
}
if status != -1 {
cond = cond.And(builder.Eq{"status": status})
}
if isRecovery != -1 {
cond = cond.And(builder.Eq{"is_recovery": isRecovery})
}
if priority != -1 {
cond = cond.And(builder.Eq{"priority": priority})
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
qarg := "%" + arr[i] + "%"
innerCond := builder.NewCond()
innerCond = innerCond.Or(builder.Like{"res_classpaths", qarg})
innerCond = innerCond.Or(builder.Like{"rule_name", qarg})
innerCond = innerCond.Or(builder.Like{"tags", qarg})
cond = cond.And(innerCond)
}
}
num, err = DB.Where(cond).Count(new(HistoryAlertEvent))
if err != nil {
logger.Errorf("mysql.error: count history_alert_event fail: %v", err)
return 0, internalServerError
}
return num, nil
}
func HistoryAlertEventGets(stime, etime int64, query string, status, isRecovery, priority int, limit, offset int) ([]HistoryAlertEvent, error) {
cond := builder.NewCond()
if stime != 0 && etime != 0 {
cond = cond.And(builder.Between{Col: "trigger_time", LessVal: stime, MoreVal: etime})
}
if status != -1 {
cond = cond.And(builder.Eq{"status": status})
}
if isRecovery != -1 {
cond = cond.And(builder.Eq{"is_recovery": isRecovery})
}
if priority != -1 {
cond = cond.And(builder.Eq{"priority": priority})
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
qarg := "%" + arr[i] + "%"
innerCond := builder.NewCond()
innerCond = innerCond.Or(builder.Like{"res_classpaths", qarg})
innerCond = innerCond.Or(builder.Like{"rule_name", qarg})
innerCond = innerCond.Or(builder.Like{"tags", qarg})
cond = cond.And(innerCond)
}
}
var objs []HistoryAlertEvent
err := DB.Where(cond).Desc("trigger_time").Limit(limit, offset).Find(&objs)
if err != nil {
logger.Errorf("mysql.error: query history_alert_event fail: %v", err)
return objs, internalServerError
}
if len(objs) == 0 {
return []HistoryAlertEvent{}, nil
}
return objs, nil
}
func HistoryAlertEventGet(where string, args ...interface{}) (*HistoryAlertEvent, error) {
var obj HistoryAlertEvent
has, err := DB.Where(where, args...).Get(&obj)
if err != nil {
logger.Errorf("mysql.error: query history_alert_event(%s)%+v fail: %s", where, args, err)
return nil, internalServerError
}
if !has {
return nil, nil
}
return &obj, nil
}
......@@ -313,6 +313,31 @@ CREATE TABLE `alert_event` (
KEY (`trigger_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `history_alert_event` (
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
`hash_id` varchar(255) NOT NULL COMMENT 'rule_id + point_pk',
`rule_id` bigint unsigned NOT NULL,
`rule_name` varchar(255) NOT NULL,
`rule_note` varchar(512) NOT NULL DEFAULT 'alert rule note',
`res_classpaths` varchar(1024) NOT NULL DEFAULT '' COMMENT 'belong classpaths',
`priority` tinyint(1) NOT NULL,
`status` tinyint(1) NOT NULL,
`is_prome_pull` tinyint(1) NOT NULL,
`is_recovery` tinyint(1) NOT NULL,
`history_points` text COMMENT 'metric, history points',
`trigger_time` bigint NOT NULL,
`notify_channels` varchar(255) NOT NULL DEFAULT '',
`notify_groups` varchar(255) NOT NULL DEFAULT '',
`notify_users` varchar(255) NOT NULL DEFAULT '',
`runbook_url` varchar(255) DEFAULT NULL,
`readable_expression` varchar(1024) NOT NULL COMMENT 'e.g. mem.bytes.used.percent(all,60s) > 0',
`tags` varchar(1024) NOT NULL DEFAULT 'merge data_tags rule_tags and res_tags',
PRIMARY KEY (`id`),
KEY `hash_id` (`hash_id`),
KEY `rule_id` (`rule_id`),
KEY `trigger_time` (`trigger_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `metric_description` (
`id` bigint unsigned not null auto_increment,
`metric` varchar(255) not null default '',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册