未验证 提交 a0780e2b 编写于 作者: K KubeSphere CI Bot 提交者: GitHub

Merge pull request #3338 from junotx/ca

custom alerting tuning
......@@ -17,7 +17,7 @@ limitations under the License.
package v2alpha1
import (
"regexp"
"context"
"sort"
"strconv"
"strings"
......@@ -26,21 +26,27 @@ import (
"github.com/emicklei/go-restful"
"github.com/pkg/errors"
prommodel "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/pkg/timestamp"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/template"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
)
const (
RuleLevelCluster RuleLevel = "cluster"
RuleLevelNamespace RuleLevel = "namespace"
AnnotationKeyRuleUpdateTime = "rule_update_time"
)
var (
ErrThanosRulerNotEnabled = errors.New("The request operation to custom alerting rule could not be done because thanos ruler is not enabled")
ErrAlertingRuleNotFound = errors.New("The alerting rule was not found")
ErrAlertingRuleAlreadyExists = errors.New("The alerting rule already exists")
ErrAlertingAPIV2NotEnabled = errors.New("The alerting v2 API is not enabled")
ruleLabelNameMatcher = regexp.MustCompile(`[a-zA-Z_][a-zA-Z0-9_]*`)
templateTestData = template.AlertTemplateData(map[string]string{}, map[string]string{}, 0)
templateTestTextPrefix = "{{$labels := .Labels}}{{$externalLabels := .ExternalLabels}}{{$value := .Value}}"
)
type RuleLevel string
......@@ -65,7 +71,10 @@ func (r *PostableAlertingRule) Validate() error {
if r.Name == "" {
errs = append(errs, errors.New("name can not be empty"))
}
if _, err := parser.ParseExpr(r.Query); err != nil {
if r.Query == "" {
errs = append(errs, errors.New("query can not be empty"))
} else if _, err := parser.ParseExpr(r.Query); err != nil {
errs = append(errs, errors.Wrapf(err, "query is invalid: %s", r.Query))
}
if r.Duration != "" {
......@@ -74,12 +83,42 @@ func (r *PostableAlertingRule) Validate() error {
}
}
parseTest := func(text string) error {
tmpl := template.NewTemplateExpander(
context.TODO(),
templateTestTextPrefix+text,
"__alert_"+r.Name,
templateTestData,
prommodel.Time(timestamp.FromTime(time.Now())),
nil,
nil,
)
return tmpl.ParseTest()
}
if len(r.Labels) > 0 {
for name, _ := range r.Labels {
if !ruleLabelNameMatcher.MatchString(name) || strings.HasPrefix(name, "__") {
for name, v := range r.Labels {
if !prommodel.LabelName(name).IsValid() || strings.HasPrefix(name, "__") {
errs = append(errs, errors.Errorf(
"label name (%s) is not valid. The name must match [a-zA-Z_][a-zA-Z0-9_]* and has not the __ prefix (label names with this prefix are for internal use)", name))
}
if !prommodel.LabelValue(v).IsValid() {
errs = append(errs, errors.Errorf("invalid label value: %s", v))
}
if err := parseTest(v); err != nil {
errs = append(errs, errors.Errorf("invalid label value: %s", v))
}
}
}
if len(r.Annotations) > 0 {
for name, v := range r.Annotations {
if !prommodel.LabelName(name).IsValid() {
errs = append(errs, errors.Errorf("invalid annotation name: %s", v))
}
if err := parseTest(v); err != nil {
errs = append(errs, errors.Errorf("invalid annotation value: %s", v))
}
}
}
......@@ -126,7 +165,7 @@ type AlertingRuleQueryParams struct {
LabelEqualFilters map[string]string
LabelContainFilters map[string]string
Offset int
PageNum int
Limit int
SortField string
SortType string
......@@ -180,10 +219,22 @@ func AlertingRuleIdCompare(leftId, rightId string) bool {
}
func (q *AlertingRuleQueryParams) Sort(rules []*GettableAlertingRule) {
idCompare := func(left, right *GettableAlertingRule) bool {
baseCompare := func(left, right *GettableAlertingRule) bool {
var leftUpdateTime, rightUpdateTime string
if len(left.Annotations) > 0 {
leftUpdateTime = left.Annotations[AnnotationKeyRuleUpdateTime]
}
if len(right.Annotations) > 0 {
rightUpdateTime = right.Annotations[AnnotationKeyRuleUpdateTime]
}
if leftUpdateTime != rightUpdateTime {
return leftUpdateTime > rightUpdateTime
}
return AlertingRuleIdCompare(left.Id, right.Id)
}
var compare = idCompare
var compare = baseCompare
if q != nil {
reverse := q.SortType == "desc"
switch q.SortField {
......@@ -195,7 +246,7 @@ func (q *AlertingRuleQueryParams) Sort(rules []*GettableAlertingRule) {
}
return c < 0
}
return idCompare(left, right)
return baseCompare(left, right)
}
case "lastEvaluation":
compare = func(left, right *GettableAlertingRule) bool {
......@@ -213,7 +264,7 @@ func (q *AlertingRuleQueryParams) Sort(rules []*GettableAlertingRule) {
return left.LastEvaluation.Before(*right.LastEvaluation)
}
}
return idCompare(left, right)
return baseCompare(left, right)
}
case "evaluationTime":
compare = func(left, right *GettableAlertingRule) bool {
......@@ -223,7 +274,7 @@ func (q *AlertingRuleQueryParams) Sort(rules []*GettableAlertingRule) {
}
return left.EvaluationDurationSeconds < right.EvaluationDurationSeconds
}
return idCompare(left, right)
return baseCompare(left, right)
}
}
}
......@@ -235,7 +286,7 @@ func (q *AlertingRuleQueryParams) Sort(rules []*GettableAlertingRule) {
func (q *AlertingRuleQueryParams) Sub(rules []*GettableAlertingRule) []*GettableAlertingRule {
start, stop := 0, 10
if q != nil {
start, stop = q.Offset, q.Offset+q.Limit
start, stop = (q.PageNum-1)*q.Limit, q.PageNum*q.Limit
}
total := len(rules)
if start < total {
......@@ -252,8 +303,8 @@ type AlertQueryParams struct {
LabelEqualFilters map[string]string
LabelContainFilters map[string]string
Offset int
Limit int
PageNum int
Limit int
}
func (q *AlertQueryParams) Filter(alerts []*Alert) []*Alert {
......@@ -312,7 +363,7 @@ func (q *AlertQueryParams) Sort(alerts []*Alert) {
func (q *AlertQueryParams) Sub(alerts []*Alert) []*Alert {
start, stop := 0, 10
if q != nil {
start, stop = q.Offset, q.Offset+q.Limit
start, stop = (q.PageNum-1)*q.Limit, q.PageNum*q.Limit
}
total := len(alerts)
if start < total {
......@@ -333,7 +384,14 @@ func ParseAlertingRuleQueryParams(req *restful.Request) (*AlertingRuleQueryParam
q.NameContainFilter = req.QueryParameter("name")
q.State = req.QueryParameter("state")
q.Health = req.QueryParameter("health")
q.Offset, _ = strconv.Atoi(req.QueryParameter("offset"))
q.PageNum, err = strconv.Atoi(req.QueryParameter("page"))
if err != nil {
q.PageNum = 1
err = nil
}
if q.PageNum <= 0 {
q.PageNum = 1
}
q.Limit, err = strconv.Atoi(req.QueryParameter("limit"))
if err != nil {
q.Limit = 10
......@@ -352,7 +410,14 @@ func ParseAlertQueryParams(req *restful.Request) (*AlertQueryParams, error) {
)
q.State = req.QueryParameter("state")
q.Offset, _ = strconv.Atoi(req.QueryParameter("offset"))
q.PageNum, err = strconv.Atoi(req.QueryParameter("page"))
if err != nil {
q.PageNum = 1
err = nil
}
if q.PageNum <= 0 {
q.PageNum = 1
}
q.Limit, err = strconv.Atoi(req.QueryParameter("limit"))
if err != nil {
q.Limit = 10
......
......@@ -41,10 +41,24 @@ func AddToContainer(container *restful.Container, informers informers.InformerFa
promResourceClient promresourcesclient.Interface, ruleClient alerting.RuleClient,
option *alerting.Options) error {
handler := newHandler(informers, promResourceClient, ruleClient, option)
ws := runtime.NewWebService(GroupVersion)
if informers == nil || promResourceClient == nil || ruleClient == nil || option == nil {
h := func(req *restful.Request, resp *restful.Response) {
ksapi.HandleBadRequest(resp, nil, alertingv2alpha1.ErrAlertingAPIV2NotEnabled)
return
}
ws.Route(ws.GET("/{path:*}").To(h).Returns(http.StatusOK, ksapi.StatusOK, nil))
ws.Route(ws.PUT("/{path:*}").To(h).Returns(http.StatusOK, ksapi.StatusOK, nil))
ws.Route(ws.POST("/{path:*}").To(h).Returns(http.StatusOK, ksapi.StatusOK, nil))
ws.Route(ws.DELETE("/{path:*}").To(h).Returns(http.StatusOK, ksapi.StatusOK, nil))
ws.Route(ws.PATCH("/{path:*}").To(h).Returns(http.StatusOK, ksapi.StatusOK, nil))
container.Add(ws)
return nil
}
handler := newHandler(informers, promResourceClient, ruleClient, option)
ws.Route(ws.GET("/rules").
To(handler.handleListCustomAlertingRules).
Doc("list the cluster-level custom alerting rules").
......@@ -54,7 +68,7 @@ func AddToContainer(container *restful.Container, informers informers.InformerFa
Param(ws.QueryParameter("label_filters", "label filters, concatenating multiple filters with commas, equal symbol for exact query, wave symbol for fuzzy query e.g. name~a").DataFormat("key=%s,key~%s")).
Param(ws.QueryParameter("sort_field", "sort field, one of `name`, `lastEvaluation`, `evaluationTime`")).
Param(ws.QueryParameter("sort_type", "sort type, one of `asc`, `desc`")).
Param(ws.QueryParameter("offset", "offset of the result set").DataType("integer").DefaultValue("0")).
Param(ws.QueryParameter("page", "page of the result set").DataType("integer").DefaultValue("1")).
Param(ws.QueryParameter("limit", "limit size of the result set").DataType("integer").DefaultValue("10")).
Returns(http.StatusOK, ksapi.StatusOK, alertingv2alpha1.GettableAlertingRuleList{}).
Metadata(restfulspec.KeyOpenAPITags, []string{constants.AlertingTag}))
......@@ -64,7 +78,7 @@ func AddToContainer(container *restful.Container, informers informers.InformerFa
Doc("list the alerts of the cluster-level custom alerting rules").
Param(ws.QueryParameter("state", "state, one of `firing`, `pending`, `inactive`")).
Param(ws.QueryParameter("label_filters", "label filters, concatenating multiple filters with commas, equal symbol for exact query, wave symbol for fuzzy query e.g. name~a").DataFormat("key=%s,key~%s")).
Param(ws.QueryParameter("offset", "offset of the result set").DataType("integer").DefaultValue("0")).
Param(ws.QueryParameter("page", "page of the result set").DataType("integer").DefaultValue("1")).
Param(ws.QueryParameter("limit", "limit size of the result set").DataType("integer").DefaultValue("10")).
Returns(http.StatusOK, ksapi.StatusOK, alertingv2alpha1.AlertList{}).
Metadata(restfulspec.KeyOpenAPITags, []string{constants.AlertingTag}))
......@@ -78,7 +92,7 @@ func AddToContainer(container *restful.Container, informers informers.InformerFa
ws.Route(ws.GET("/rules/{rule_name}/alerts").
To(handler.handleListCustomRuleAlerts).
Doc("list the alerts of the cluster-level custom alerting rule with the specified name").
Returns(http.StatusOK, ksapi.StatusOK, []alertingv2alpha1.Alert{}).
Returns(http.StatusOK, ksapi.StatusOK, alertingv2alpha1.AlertList{}).
Metadata(restfulspec.KeyOpenAPITags, []string{constants.AlertingTag}))
ws.Route(ws.POST("/rules").
......@@ -110,7 +124,7 @@ func AddToContainer(container *restful.Container, informers informers.InformerFa
Param(ws.QueryParameter("label_filters", "label filters, concatenating multiple filters with commas, equal symbol for exact query, wave symbol for fuzzy query e.g. name~a").DataFormat("key=%s,key~%s")).
Param(ws.QueryParameter("sort_field", "sort field, one of `name`, `lastEvaluation`, `evaluationTime`")).
Param(ws.QueryParameter("sort_type", "sort type, one of `asc`, `desc`")).
Param(ws.QueryParameter("offset", "offset of the result set").DataType("integer").DefaultValue("0")).
Param(ws.QueryParameter("page", "page of the result set").DataType("integer").DefaultValue("1")).
Param(ws.QueryParameter("limit", "limit size of the result set").DataType("integer").DefaultValue("10")).
Returns(http.StatusOK, ksapi.StatusOK, alertingv2alpha1.GettableAlertingRuleList{}).
Metadata(restfulspec.KeyOpenAPITags, []string{constants.AlertingTag}))
......@@ -120,7 +134,7 @@ func AddToContainer(container *restful.Container, informers informers.InformerFa
Doc("list the alerts of the custom alerting rules in the specified namespace.").
Param(ws.QueryParameter("state", "state, one of `firing`, `pending`, `inactive`")).
Param(ws.QueryParameter("label_filters", "label filters, concatenating multiple filters with commas, equal symbol for exact query, wave symbol for fuzzy query e.g. name~a").DataFormat("key=%s,key~%s")).
Param(ws.QueryParameter("offset", "offset of the result set").DataType("integer").DefaultValue("0")).
Param(ws.QueryParameter("page", "page of the result set").DataType("integer").DefaultValue("1")).
Param(ws.QueryParameter("limit", "limit size of the result set").DataType("integer").DefaultValue("10")).
Returns(http.StatusOK, ksapi.StatusOK, alertingv2alpha1.AlertList{}).
Metadata(restfulspec.KeyOpenAPITags, []string{constants.AlertingTag}))
......@@ -134,7 +148,7 @@ func AddToContainer(container *restful.Container, informers informers.InformerFa
ws.Route(ws.GET("/namespaces/{namespace}/rules/{rule_name}/alerts").
To(handler.handleListCustomRuleAlerts).
Doc("get the alerts of the custom alerting rule with the specified name in the specified namespace").
Returns(http.StatusOK, ksapi.StatusOK, []alertingv2alpha1.Alert{}).
Returns(http.StatusOK, ksapi.StatusOK, alertingv2alpha1.AlertList{}).
Metadata(restfulspec.KeyOpenAPITags, []string{constants.AlertingTag}))
ws.Route(ws.POST("/namespaces/{namespace}/rules").
......@@ -166,7 +180,7 @@ func AddToContainer(container *restful.Container, informers informers.InformerFa
Param(ws.QueryParameter("label_filters", "label filters, concatenating multiple filters with commas, equal symbol for exact query, wave symbol for fuzzy query e.g. name~a").DataFormat("key=%s,key~%s")).
Param(ws.QueryParameter("sort_field", "sort field, one of `name`, `lastEvaluation`, `evaluationTime`")).
Param(ws.QueryParameter("sort_type", "sort type, one of `asc`, `desc`")).
Param(ws.QueryParameter("offset", "offset of the result set").DataType("integer").DefaultValue("0")).
Param(ws.QueryParameter("page", "page of the result set").DataType("integer").DefaultValue("1")).
Param(ws.QueryParameter("limit", "limit size of the result set").DataType("integer").DefaultValue("10")).
Returns(http.StatusOK, ksapi.StatusOK, alertingv2alpha1.GettableAlertingRuleList{}).
Metadata(restfulspec.KeyOpenAPITags, []string{constants.AlertingTag}))
......@@ -176,7 +190,7 @@ func AddToContainer(container *restful.Container, informers informers.InformerFa
Doc("list the alerts of the builtin(non-custom) rules").
Param(ws.QueryParameter("state", "state, one of `firing`, `pending`, `inactive`")).
Param(ws.QueryParameter("label_filters", "label filters, concatenating multiple filters with commas, equal symbol for exact query, wave symbol for fuzzy query e.g. name~a").DataFormat("key=%s,key~%s")).
Param(ws.QueryParameter("offset", "offset of the result set").DataType("integer").DefaultValue("0")).
Param(ws.QueryParameter("page", "page of the result set").DataType("integer").DefaultValue("1")).
Param(ws.QueryParameter("limit", "limit size of the result set").DataType("integer").DefaultValue("10")).
Returns(http.StatusOK, ksapi.StatusOK, alertingv2alpha1.AlertList{}).
Metadata(restfulspec.KeyOpenAPITags, []string{constants.AlertingTag}))
......@@ -190,7 +204,7 @@ func AddToContainer(container *restful.Container, informers informers.InformerFa
ws.Route(ws.GET("/builtin/rules/{rule_id}/alerts").
To(handler.handleListBuiltinRuleAlerts).
Doc("list the alerts of the builtin(non-custom) alerting rule with the specified id").
Returns(http.StatusOK, ksapi.StatusOK, []alertingv2alpha1.Alert{}).
Returns(http.StatusOK, ksapi.StatusOK, alertingv2alpha1.AlertList{}).
Metadata(restfulspec.KeyOpenAPITags, []string{constants.AlertingTag}))
container.Add(ws)
......
......@@ -4,6 +4,7 @@ import (
"context"
"sort"
"strings"
"time"
"github.com/pkg/errors"
promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
......@@ -48,7 +49,7 @@ type Operator interface {
// GetCustomAlertingRule gets the custom alerting rule with the given name.
GetCustomAlertingRule(ctx context.Context, namespace, ruleName string) (*v2alpha1.GettableAlertingRule, error)
// ListCustomRuleAlerts lists the alerts of the custom alerting rule with the given name.
ListCustomRuleAlerts(ctx context.Context, namespace, ruleName string) ([]*v2alpha1.Alert, error)
ListCustomRuleAlerts(ctx context.Context, namespace, ruleName string) (*v2alpha1.AlertList, error)
// CreateCustomAlertingRule creates a custom alerting rule.
CreateCustomAlertingRule(ctx context.Context, namespace string, rule *v2alpha1.PostableAlertingRule) error
// UpdateCustomAlertingRule updates the custom alerting rule with the given name.
......@@ -65,7 +66,7 @@ type Operator interface {
// GetBuiltinAlertingRule gets the builtin(non-custom) alerting rule with the given id
GetBuiltinAlertingRule(ctx context.Context, ruleId string) (*v2alpha1.GettableAlertingRule, error)
// ListBuiltinRuleAlerts lists the alerts of the builtin(non-custom) alerting rule with the given id
ListBuiltinRuleAlerts(ctx context.Context, ruleId string) ([]*v2alpha1.Alert, error)
ListBuiltinRuleAlerts(ctx context.Context, ruleId string) (*v2alpha1.AlertList, error)
}
func NewOperator(informers informers.InformerFactory,
......@@ -184,7 +185,7 @@ func (o *operator) GetCustomAlertingRule(ctx context.Context, namespace, ruleNam
}
func (o *operator) ListCustomRuleAlerts(ctx context.Context, namespace, ruleName string) (
[]*v2alpha1.Alert, error) {
*v2alpha1.AlertList, error) {
rule, err := o.GetCustomAlertingRule(ctx, namespace, ruleName)
if err != nil {
......@@ -193,7 +194,10 @@ func (o *operator) ListCustomRuleAlerts(ctx context.Context, namespace, ruleName
if rule == nil {
return nil, v2alpha1.ErrAlertingRuleNotFound
}
return rule.Alerts, nil
return &v2alpha1.AlertList{
Total: len(rule.Alerts),
Items: rule.Alerts,
}, nil
}
func (o *operator) ListBuiltinAlertingRules(ctx context.Context,
......@@ -223,7 +227,7 @@ func (o *operator) GetBuiltinAlertingRule(ctx context.Context, ruleId string) (
return o.getBuiltinAlertingRule(ctx, ruleId)
}
func (o *operator) ListBuiltinRuleAlerts(ctx context.Context, ruleId string) ([]*v2alpha1.Alert, error) {
func (o *operator) ListBuiltinRuleAlerts(ctx context.Context, ruleId string) (*v2alpha1.AlertList, error) {
rule, err := o.getBuiltinAlertingRule(ctx, ruleId)
if err != nil {
return nil, err
......@@ -231,7 +235,10 @@ func (o *operator) ListBuiltinRuleAlerts(ctx context.Context, ruleId string) ([]
if rule == nil {
return nil, v2alpha1.ErrAlertingRuleNotFound
}
return rule.Alerts, nil
return &v2alpha1.AlertList{
Total: len(rule.Alerts),
Items: rule.Alerts,
}, nil
}
func (o *operator) ListClusterAlertingRules(ctx context.Context, customFlag string,
......@@ -464,6 +471,8 @@ func (o *operator) CreateCustomAlertingRule(ctx context.Context, namespace strin
return v2alpha1.ErrAlertingRuleAlreadyExists
}
setRuleUpdateTime(rule, time.Now())
return ruler.AddAlertingRule(ctx, ruleNamespace, extraRuleResourceSelector,
customRuleGroupDefault, parseToPrometheusRule(rule), ruleResourceLabels)
}
......@@ -515,6 +524,8 @@ func (o *operator) UpdateCustomAlertingRule(ctx context.Context, namespace, name
return v2alpha1.ErrAlertingRuleNotFound
}
setRuleUpdateTime(rule, time.Now())
return ruler.UpdateAlertingRule(ctx, ruleNamespace, extraRuleResourceSelector,
resourceRule.Group, parseToPrometheusRule(rule), ruleResourceLabels)
}
......@@ -627,3 +638,13 @@ func pageAlerts(alertingRules []*v2alpha1.GettableAlertingRule,
Items: queryParams.Sub(alerts),
}
}
func setRuleUpdateTime(rule *v2alpha1.PostableAlertingRule, t time.Time) {
if rule.Annotations == nil {
rule.Annotations = make(map[string]string)
}
if t.IsZero() {
t = time.Now()
}
rule.Annotations[v2alpha1.AnnotationKeyRuleUpdateTime] = t.UTC().Format(time.RFC3339)
}
......@@ -3,16 +3,20 @@ package rules
import (
"context"
"fmt"
"net/http"
"sort"
"github.com/docker/docker/pkg/locker"
"github.com/ghodss/yaml"
"github.com/pkg/errors"
promresourcesv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
prominformersv1 "github.com/prometheus-operator/prometheus-operator/pkg/client/informers/externalversions/monitoring/v1"
promresourcesclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/util/retry"
"kubesphere.io/kubesphere/pkg/api/alerting/v2alpha1"
)
......@@ -268,6 +272,7 @@ type ThanosRuler struct {
resource *promresourcesv1.ThanosRuler
informer prominformersv1.PrometheusRuleInformer
client promresourcesclient.Interface
locker locker.Locker
}
func NewThanosRuler(resource *promresourcesv1.ThanosRuler, informer prominformersv1.PrometheusRuleInformer,
......@@ -345,7 +350,7 @@ func (r *ThanosRuler) AddAlertingRule(ctx context.Context, ruleNamespace *corev1
}
func (r *ThanosRuler) addAlertingRule(ctx context.Context, ruleNamespace *corev1.Namespace,
prometheusRules []*promresourcesv1.PrometheusRule, excludeRuleResources map[string]*ruleResource,
prometheusRules []*promresourcesv1.PrometheusRule, excludePrometheusRules map[string]*promresourcesv1.PrometheusRule,
group string, rule *promresourcesv1.Rule, ruleResourceLabels map[string]string) error {
sort.Slice(prometheusRules, func(i, j int) bool {
......@@ -353,23 +358,30 @@ func (r *ThanosRuler) addAlertingRule(ctx context.Context, ruleNamespace *corev1
})
for _, prometheusRule := range prometheusRules {
if len(excludeRuleResources) > 0 {
if _, ok := excludeRuleResources[prometheusRule.Name]; ok {
if len(excludePrometheusRules) > 0 {
if _, ok := excludePrometheusRules[prometheusRule.Name]; ok {
continue
}
}
resource := ruleResource(*prometheusRule)
if ok, err := resource.addAlertingRule(group, rule); err != nil {
if err := r.doRuleResourceOperation(ctx, prometheusRule, func(pr *promresourcesv1.PrometheusRule) error {
resource := ruleResource(*pr)
if ok, err := resource.addAlertingRule(group, rule); err != nil {
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
}
return nil
}); err != nil {
if err == errOutOfConfigMapSize {
break
} else if resourceNotFound(err) {
continue
}
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
return nil
}
return nil
}
// create a new rule resource and add rule into it when all existing rule resources are full.
newPromRule := promresourcesv1.PrometheusRule{
......@@ -403,38 +415,52 @@ func (r *ThanosRuler) UpdateAlertingRule(ctx context.Context, ruleNamespace *cor
}
var (
found bool
success bool
resourcesToDelRule = make(map[string]*ruleResource)
found bool
success bool
prsToDelRule = make(map[string]*promresourcesv1.PrometheusRule)
)
for _, prometheusRule := range prometheusRules {
resource := ruleResource(*prometheusRule)
for i, prometheusRule := range prometheusRules {
if success { // If the update has been successful, delete the possible same rule in other resources
if ok, err := resource.deleteAlertingRule(rule.Alert); err != nil {
if err := r.doRuleResourceOperation(ctx, prometheusRule, func(pr *promresourcesv1.PrometheusRule) error {
resource := ruleResource(*pr)
if ok, err := resource.deleteAlertingRule(rule.Alert); err != nil {
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
}
return nil
}); err != nil && !resourceNotFound(err) {
return err
}
continue
}
if err := r.doRuleResourceOperation(ctx, prometheusRule, func(pr *promresourcesv1.PrometheusRule) error {
resource := ruleResource(*pr)
if ok, err := resource.updateAlertingRule(group, rule); err != nil {
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
}
continue
}
if ok, err := resource.updateAlertingRule(group, rule); err != nil {
if err == errOutOfConfigMapSize {
// updating the rule in the resource will oversize the size limit,
return nil
}); err != nil {
if resourceNotFound(err) {
continue
} else if err == errOutOfConfigMapSize {
// updating the rule in the resource may oversize the size limit,
// so delete it and then add the new rule to a new resource.
resourcesToDelRule[resource.Name] = &resource
prsToDelRule[prometheusRule.Name] = prometheusRules[i]
found = true
} else {
return err
}
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
continue
}
found = true
success = true
return err
}
found = true
success = true
}
if !found {
......@@ -442,18 +468,24 @@ func (r *ThanosRuler) UpdateAlertingRule(ctx context.Context, ruleNamespace *cor
}
if !success {
err := r.addAlertingRule(ctx, ruleNamespace, prometheusRules, resourcesToDelRule, group, rule, ruleResourceLabels)
err := r.addAlertingRule(ctx, ruleNamespace, prometheusRules, prsToDelRule, group, rule, ruleResourceLabels)
if err != nil {
return err
}
}
for _, resource := range resourcesToDelRule {
if ok, err := resource.deleteAlertingRule(rule.Alert); err != nil {
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
for _, pr := range prsToDelRule {
if err := r.doRuleResourceOperation(ctx, pr, func(pr *promresourcesv1.PrometheusRule) error {
resource := ruleResource(*pr)
if ok, err := resource.deleteAlertingRule(rule.Alert); err != nil {
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
}
return nil
}); err != nil && !resourceNotFound(err) {
return err
}
}
return nil
......@@ -467,15 +499,23 @@ func (r *ThanosRuler) DeleteAlertingRule(ctx context.Context, ruleNamespace *cor
}
var success bool
for _, prometheusRule := range prometheusRules {
resource := ruleResource(*prometheusRule)
if ok, err := resource.deleteAlertingRule(name); err != nil {
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
if err := r.doRuleResourceOperation(ctx, prometheusRule, func(pr *promresourcesv1.PrometheusRule) error {
resource := ruleResource(*pr)
if ok, err := resource.deleteAlertingRule(name); err != nil {
return err
} else if ok {
if err = resource.commit(ctx, r.client); err != nil {
return err
}
}
success = true
return nil
}); err != nil {
if resourceNotFound(err) {
continue
}
return err
}
success = true
}
if !success {
return v2alpha1.ErrAlertingRuleNotFound
......@@ -483,6 +523,20 @@ func (r *ThanosRuler) DeleteAlertingRule(ctx context.Context, ruleNamespace *cor
return nil
}
func (r *ThanosRuler) doRuleResourceOperation(ctx context.Context, pr *promresourcesv1.PrometheusRule,
operation func(pr *promresourcesv1.PrometheusRule) error) error {
key := pr.Namespace + "/" + pr.Name
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
r.locker.Lock(key)
defer r.locker.Unlock(key)
pr, err := r.client.MonitoringV1().PrometheusRules(pr.Namespace).Get(ctx, pr.Name, metav1.GetOptions{})
if err != nil {
return err
}
return operation(pr)
})
}
func ruleNamespaceSelected(r Ruler, ruleNamespace *corev1.Namespace) (bool, error) {
rnSelector, err := r.RuleResourceNamespaceSelector()
if err != nil {
......@@ -499,3 +553,13 @@ func ruleNamespaceSelected(r Ruler, ruleNamespace *corev1.Namespace) (bool, erro
}
return true, nil
}
func resourceNotFound(err error) bool {
switch e := err.(type) {
case *apierrors.StatusError:
if e.Status().Code == http.StatusNotFound {
return true
}
}
return false
}
package rules
import (
"kubesphere.io/kubesphere/pkg/simple/client/alerting"
"path/filepath"
"sort"
"strings"
......@@ -16,6 +15,7 @@ import (
"github.com/prometheus/prometheus/rules"
"k8s.io/klog"
"kubesphere.io/kubesphere/pkg/api/alerting/v2alpha1"
"kubesphere.io/kubesphere/pkg/simple/client/alerting"
)
const (
......@@ -25,6 +25,9 @@ const (
LabelKeyInternalRuleName = "__rule_name__"
LabelKeyInternalRuleQuery = "__rule_query__"
LabelKeyInternalRuleDuration = "__rule_duration__"
LabelKeyThanosRulerReplica = "thanos_ruler_replica"
LabelKeyPrometheusReplica = "prometheus_replica"
)
func FormatExpr(expr string) (string, error) {
......@@ -100,16 +103,21 @@ func GenEndpointRuleId(group string, epRule *alerting.AlertingRule,
}
duration := parseDurationSeconds(epRule.Duration)
var labelsMap map[string]string
if externalLabels == nil {
labelsMap = epRule.Labels
} else {
labelsMap = make(map[string]string)
extLabels := externalLabels()
for key, value := range epRule.Labels {
if v, ok := extLabels[key]; !(ok && value == v) {
labelsMap[key] = value
}
var extLabels map[string]string
if externalLabels != nil {
extLabels = externalLabels()
}
labelsMap := make(map[string]string)
for key, value := range epRule.Labels {
if key == LabelKeyPrometheusReplica || key == LabelKeyThanosRulerReplica {
continue
}
if extLabels == nil {
labelsMap[key] = value
continue
}
if v, ok := extLabels[key]; !(ok && value == v) {
labelsMap[key] = value
}
}
......@@ -148,11 +156,11 @@ func GetAlertingRulesStatus(ruleNamespace string, ruleChunk *ResourceRuleChunk,
continue
}
for _, epRule := range group.Rules {
for i, epRule := range group.Rules {
if eid, err := GenEndpointRuleId(group.Name, epRule, extLabels); err != nil {
return nil, errors.Wrap(err, ErrGenRuleId)
} else {
idEpRules[eid] = epRule
idEpRules[eid] = group.Rules[i]
nameIds[epRule.Name] = append(nameIds[epRule.Name], eid)
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册