alert_mute_cache.go 3.2 KB
Newer Older
U
UlricQin 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
package memsto

import (
	"fmt"
	"sync"
	"time"

	"github.com/pkg/errors"
	"github.com/toolkits/pkg/logger"

	"github.com/didi/nightingale/v5/src/models"
	"github.com/didi/nightingale/v5/src/server/config"
	promstat "github.com/didi/nightingale/v5/src/server/stat"
)

type AlertMuteCacheType struct {
	statTotal       int64
	statLastUpdated int64

	sync.RWMutex
	mutes map[int64][]*models.AlertMute // key: busi_group_id
}

var AlertMuteCache = AlertMuteCacheType{
	statTotal:       -1,
	statLastUpdated: -1,
	mutes:           make(map[int64][]*models.AlertMute),
}

func (amc *AlertMuteCacheType) StatChanged(total, lastUpdated int64) bool {
	if amc.statTotal == total && amc.statLastUpdated == lastUpdated {
		return false
	}

	return true
}

func (amc *AlertMuteCacheType) Set(ms map[int64][]*models.AlertMute, total, lastUpdated int64) {
	amc.Lock()
	amc.mutes = ms
	amc.Unlock()

	// only one goroutine used, so no need lock
	amc.statTotal = total
	amc.statLastUpdated = lastUpdated
}

func (amc *AlertMuteCacheType) Gets(bgid int64) ([]*models.AlertMute, bool) {
	amc.RLock()
	defer amc.RUnlock()
	lst, has := amc.mutes[bgid]
	return lst, has
}

func (amc *AlertMuteCacheType) GetAllStructs() map[int64][]models.AlertMute {
	amc.RLock()
	defer amc.RUnlock()

	ret := make(map[int64][]models.AlertMute)
	for bgid := range amc.mutes {
		lst := amc.mutes[bgid]
		for i := 0; i < len(lst); i++ {
			ret[bgid] = append(ret[bgid], *lst[i])
		}
	}

	return ret
}

func SyncAlertMutes() {
	err := syncAlertMutes()
	if err != nil {
		fmt.Println("failed to sync alert mutes:", err)
		exit(1)
	}

	go loopSyncAlertMutes()
}

func loopSyncAlertMutes() {
	duration := time.Duration(9000) * time.Millisecond
	for {
		time.Sleep(duration)
		if err := syncAlertMutes(); err != nil {
			logger.Warning("failed to sync alert mutes:", err)
		}
	}
}

func syncAlertMutes() error {
	start := time.Now()
	btime := start.Unix() - int64(30)

	stat, err := models.AlertMuteStatistics(config.C.ClusterName, btime)
	if err != nil {
		return errors.WithMessage(err, "failed to exec AlertMuteStatistics")
	}

	if !AlertMuteCache.StatChanged(stat.Total, stat.LastUpdated) {
		promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_alert_mutes").Set(0)
		promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_alert_mutes").Set(0)
		logger.Debug("alert mutes not changed")
		return nil
	}

	lst, err := models.AlertMuteGetsByCluster(config.C.ClusterName, btime)
	if err != nil {
		return errors.WithMessage(err, "failed to exec AlertMuteGetsByCluster")
	}

	oks := make(map[int64][]*models.AlertMute)

	for i := 0; i < len(lst); i++ {
		err = lst[i].Parse()
		if err != nil {
			logger.Warningf("failed to parse alert_mute, id: %d", lst[i].Id)
			continue
		}

		oks[lst[i].GroupId] = append(oks[lst[i].GroupId], lst[i])
	}

	AlertMuteCache.Set(oks, stat.Total, stat.LastUpdated)

	ms := time.Since(start).Milliseconds()
	promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_alert_mutes").Set(float64(ms))
	promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_alert_mutes").Set(float64(len(lst)))
	logger.Infof("timer: sync mutes done, cost: %dms, number: %d", ms, len(lst))

	return nil
}