alert_mute_cache.go 3.1 KB
Newer Older
U
UlricQin 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
package memsto

import (
	"fmt"
	"sync"
	"time"

	"github.com/pkg/errors"
	"github.com/toolkits/pkg/logger"

	"github.com/didi/nightingale/v5/src/models"
	promstat "github.com/didi/nightingale/v5/src/server/stat"
)

type AlertMuteCacheType struct {
	statTotal       int64
	statLastUpdated int64

	sync.RWMutex
	mutes map[int64][]*models.AlertMute // key: busi_group_id
}

var AlertMuteCache = AlertMuteCacheType{
	statTotal:       -1,
	statLastUpdated: -1,
	mutes:           make(map[int64][]*models.AlertMute),
}

29 30 31 32 33 34 35 36 37
func (amc *AlertMuteCacheType) Reset() {
	amc.Lock()
	defer amc.Unlock()

	amc.statTotal = -1
	amc.statLastUpdated = -1
	amc.mutes = make(map[int64][]*models.AlertMute)
}

U
UlricQin 已提交
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
func (amc *AlertMuteCacheType) StatChanged(total, lastUpdated int64) bool {
	if amc.statTotal == total && amc.statLastUpdated == lastUpdated {
		return false
	}

	return true
}

func (amc *AlertMuteCacheType) Set(ms map[int64][]*models.AlertMute, total, lastUpdated int64) {
	amc.Lock()
	amc.mutes = ms
	amc.Unlock()

	// only one goroutine used, so no need lock
	amc.statTotal = total
	amc.statLastUpdated = lastUpdated
}

func (amc *AlertMuteCacheType) Gets(bgid int64) ([]*models.AlertMute, bool) {
	amc.RLock()
	defer amc.RUnlock()
	lst, has := amc.mutes[bgid]
	return lst, has
}

func (amc *AlertMuteCacheType) GetAllStructs() map[int64][]models.AlertMute {
	amc.RLock()
	defer amc.RUnlock()

	ret := make(map[int64][]models.AlertMute)
	for bgid := range amc.mutes {
		lst := amc.mutes[bgid]
		for i := 0; i < len(lst); i++ {
			ret[bgid] = append(ret[bgid], *lst[i])
		}
	}

	return ret
}

func SyncAlertMutes() {
	err := syncAlertMutes()
	if err != nil {
		fmt.Println("failed to sync alert mutes:", err)
		exit(1)
	}

	go loopSyncAlertMutes()
}

func loopSyncAlertMutes() {
	duration := time.Duration(9000) * time.Millisecond
	for {
		time.Sleep(duration)
		if err := syncAlertMutes(); err != nil {
			logger.Warning("failed to sync alert mutes:", err)
		}
	}
}

func syncAlertMutes() error {
	start := time.Now()

101
	stat, err := models.AlertMuteStatistics("")
U
UlricQin 已提交
102 103 104 105 106
	if err != nil {
		return errors.WithMessage(err, "failed to exec AlertMuteStatistics")
	}

	if !AlertMuteCache.StatChanged(stat.Total, stat.LastUpdated) {
7
710leo 已提交
107 108
		promstat.GaugeCronDuration.WithLabelValues("sync_alert_mutes").Set(0)
		promstat.GaugeSyncNumber.WithLabelValues("sync_alert_mutes").Set(0)
U
UlricQin 已提交
109 110 111 112
		logger.Debug("alert mutes not changed")
		return nil
	}

113
	lst, err := models.AlertMuteGetsByCluster("")
U
UlricQin 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
	if err != nil {
		return errors.WithMessage(err, "failed to exec AlertMuteGetsByCluster")
	}

	oks := make(map[int64][]*models.AlertMute)

	for i := 0; i < len(lst); i++ {
		err = lst[i].Parse()
		if err != nil {
			logger.Warningf("failed to parse alert_mute, id: %d", lst[i].Id)
			continue
		}

		oks[lst[i].GroupId] = append(oks[lst[i].GroupId], lst[i])
	}

	AlertMuteCache.Set(oks, stat.Total, stat.LastUpdated)

	ms := time.Since(start).Milliseconds()
7
710leo 已提交
133 134
	promstat.GaugeCronDuration.WithLabelValues("sync_alert_mutes").Set(float64(ms))
	promstat.GaugeSyncNumber.WithLabelValues("sync_alert_mutes").Set(float64(len(lst)))
U
UlricQin 已提交
135 136 137 138
	logger.Infof("timer: sync mutes done, cost: %dms, number: %d", ms, len(lst))

	return nil
}