Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
水淹萌龙
kubesphere
提交
a120969b
K
kubesphere
项目概览
水淹萌龙
/
kubesphere
与 Fork 源项目一致
Fork自
KubeSphere / kubesphere
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
K
kubesphere
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
a120969b
编写于
1月 29, 2021
作者:
J
junotx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimize alerting rule concurrency
Signed-off-by:
N
junotx
<
junotx@126.com
>
上级
2893f4cc
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
106 addition
and
42 deletion
+106
-42
pkg/models/alerting/rules/ruler.go
pkg/models/alerting/rules/ruler.go
+106
-42
未找到文件。
pkg/models/alerting/rules/ruler.go
浏览文件 @
a120969b
...
...
@@ -3,16 +3,20 @@ package rules
import
(
"context"
"fmt"
"net/http"
"sort"
"github.com/docker/docker/pkg/locker"
"github.com/ghodss/yaml"
"github.com/pkg/errors"
promresourcesv1
"github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
prominformersv1
"github.com/prometheus-operator/prometheus-operator/pkg/client/informers/externalversions/monitoring/v1"
promresourcesclient
"github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
corev1
"k8s.io/api/core/v1"
apierrors
"k8s.io/apimachinery/pkg/api/errors"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/util/retry"
"kubesphere.io/kubesphere/pkg/api/alerting/v2alpha1"
)
...
...
@@ -268,6 +272,7 @@ type ThanosRuler struct {
resource
*
promresourcesv1
.
ThanosRuler
informer
prominformersv1
.
PrometheusRuleInformer
client
promresourcesclient
.
Interface
locker
locker
.
Locker
}
func
NewThanosRuler
(
resource
*
promresourcesv1
.
ThanosRuler
,
informer
prominformersv1
.
PrometheusRuleInformer
,
...
...
@@ -345,7 +350,7 @@ func (r *ThanosRuler) AddAlertingRule(ctx context.Context, ruleNamespace *corev1
}
func
(
r
*
ThanosRuler
)
addAlertingRule
(
ctx
context
.
Context
,
ruleNamespace
*
corev1
.
Namespace
,
prometheusRules
[]
*
promresourcesv1
.
PrometheusRule
,
exclude
RuleResources
map
[
string
]
*
ruleResourc
e
,
prometheusRules
[]
*
promresourcesv1
.
PrometheusRule
,
exclude
PrometheusRules
map
[
string
]
*
promresourcesv1
.
PrometheusRul
e
,
group
string
,
rule
*
promresourcesv1
.
Rule
,
ruleResourceLabels
map
[
string
]
string
)
error
{
sort
.
Slice
(
prometheusRules
,
func
(
i
,
j
int
)
bool
{
...
...
@@ -353,23 +358,30 @@ func (r *ThanosRuler) addAlertingRule(ctx context.Context, ruleNamespace *corev1
})
for
_
,
prometheusRule
:=
range
prometheusRules
{
if
len
(
exclude
RuleResourc
es
)
>
0
{
if
_
,
ok
:=
exclude
RuleResourc
es
[
prometheusRule
.
Name
];
ok
{
if
len
(
exclude
PrometheusRul
es
)
>
0
{
if
_
,
ok
:=
exclude
PrometheusRul
es
[
prometheusRule
.
Name
];
ok
{
continue
}
}
resource
:=
ruleResource
(
*
prometheusRule
)
if
ok
,
err
:=
resource
.
addAlertingRule
(
group
,
rule
);
err
!=
nil
{
if
err
:=
r
.
doRuleResourceOperation
(
prometheusRule
,
func
(
newerPr
*
promresourcesv1
.
PrometheusRule
)
error
{
resource
:=
ruleResource
(
*
newerPr
)
if
ok
,
err
:=
resource
.
addAlertingRule
(
group
,
rule
);
err
!=
nil
{
return
err
}
else
if
ok
{
if
err
=
resource
.
commit
(
ctx
,
r
.
client
);
err
!=
nil
{
return
err
}
}
return
nil
});
err
!=
nil
{
if
err
==
errOutOfConfigMapSize
{
break
}
else
if
resourceNotFound
(
err
)
{
continue
}
return
err
}
else
if
ok
{
if
err
=
resource
.
commit
(
ctx
,
r
.
client
);
err
!=
nil
{
return
err
}
return
nil
}
return
nil
}
// create a new rule resource and add rule into it when all existing rule resources are full.
newPromRule
:=
promresourcesv1
.
PrometheusRule
{
...
...
@@ -403,38 +415,52 @@ func (r *ThanosRuler) UpdateAlertingRule(ctx context.Context, ruleNamespace *cor
}
var
(
found
bool
success
bool
resourcesToDelRule
=
make
(
map
[
string
]
*
ruleResourc
e
)
found
bool
success
bool
prsToDelRule
=
make
(
map
[
string
]
*
promresourcesv1
.
PrometheusRul
e
)
)
for
_
,
prometheusRule
:=
range
prometheusRules
{
resource
:=
ruleResource
(
*
prometheusRule
)
for
i
,
prometheusRule
:=
range
prometheusRules
{
if
success
{
// If the update has been successful, delete the possible same rule in other resources
if
ok
,
err
:=
resource
.
deleteAlertingRule
(
rule
.
Alert
);
err
!=
nil
{
if
err
:=
r
.
doRuleResourceOperation
(
prometheusRule
,
func
(
newerPr
*
promresourcesv1
.
PrometheusRule
)
error
{
resource
:=
ruleResource
(
*
newerPr
)
if
ok
,
err
:=
resource
.
deleteAlertingRule
(
rule
.
Alert
);
err
!=
nil
{
return
err
}
else
if
ok
{
if
err
=
resource
.
commit
(
ctx
,
r
.
client
);
err
!=
nil
{
return
err
}
}
return
nil
});
err
!=
nil
&&
!
resourceNotFound
(
err
)
{
return
err
}
continue
}
if
err
:=
r
.
doRuleResourceOperation
(
prometheusRule
,
func
(
newerPr
*
promresourcesv1
.
PrometheusRule
)
error
{
resource
:=
ruleResource
(
*
newerPr
)
if
ok
,
err
:=
resource
.
updateAlertingRule
(
group
,
rule
);
err
!=
nil
{
return
err
}
else
if
ok
{
if
err
=
resource
.
commit
(
ctx
,
r
.
client
);
err
!=
nil
{
return
err
}
}
continue
}
if
ok
,
err
:=
resource
.
updateAlertingRule
(
group
,
rule
);
err
!=
nil
{
if
err
==
errOutOfConfigMapSize
{
// updating the rule in the resource will oversize the size limit,
return
nil
});
err
!=
nil
{
if
resourceNotFound
(
err
)
{
continue
}
else
if
err
==
errOutOfConfigMapSize
{
// updating the rule in the resource may oversize the size limit,
// so delete it and then add the new rule to a new resource.
resourcesToDelRule
[
resource
.
Name
]
=
&
resource
prsToDelRule
[
prometheusRule
.
Name
]
=
prometheusRules
[
i
]
found
=
true
}
else
{
return
err
}
}
else
if
ok
{
if
err
=
resource
.
commit
(
ctx
,
r
.
client
);
err
!=
nil
{
return
err
continue
}
found
=
true
success
=
true
return
err
}
found
=
true
success
=
true
}
if
!
found
{
...
...
@@ -442,18 +468,24 @@ func (r *ThanosRuler) UpdateAlertingRule(ctx context.Context, ruleNamespace *cor
}
if
!
success
{
err
:=
r
.
addAlertingRule
(
ctx
,
ruleNamespace
,
prometheusRules
,
resource
sToDelRule
,
group
,
rule
,
ruleResourceLabels
)
err
:=
r
.
addAlertingRule
(
ctx
,
ruleNamespace
,
prometheusRules
,
pr
sToDelRule
,
group
,
rule
,
ruleResourceLabels
)
if
err
!=
nil
{
return
err
}
}
for
_
,
resource
:=
range
resourcesToDelRule
{
if
ok
,
err
:=
resource
.
deleteAlertingRule
(
rule
.
Alert
);
err
!=
nil
{
return
err
}
else
if
ok
{
if
err
=
resource
.
commit
(
ctx
,
r
.
client
);
err
!=
nil
{
for
_
,
pr
:=
range
prsToDelRule
{
if
err
:=
r
.
doRuleResourceOperation
(
pr
,
func
(
newerPr
*
promresourcesv1
.
PrometheusRule
)
error
{
resource
:=
ruleResource
(
*
newerPr
)
if
ok
,
err
:=
resource
.
deleteAlertingRule
(
rule
.
Alert
);
err
!=
nil
{
return
err
}
else
if
ok
{
if
err
=
resource
.
commit
(
ctx
,
r
.
client
);
err
!=
nil
{
return
err
}
}
return
nil
});
err
!=
nil
&&
!
resourceNotFound
(
err
)
{
return
err
}
}
return
nil
...
...
@@ -467,15 +499,23 @@ func (r *ThanosRuler) DeleteAlertingRule(ctx context.Context, ruleNamespace *cor
}
var
success
bool
for
_
,
prometheusRule
:=
range
prometheusRules
{
resource
:=
ruleResource
(
*
prometheusRule
)
if
ok
,
err
:=
resource
.
deleteAlertingRule
(
name
);
err
!=
nil
{
return
err
}
else
if
ok
{
if
err
=
resource
.
commit
(
ctx
,
r
.
client
);
err
!=
nil
{
if
err
:=
r
.
doRuleResourceOperation
(
prometheusRule
,
func
(
newerPr
*
promresourcesv1
.
PrometheusRule
)
error
{
resource
:=
ruleResource
(
*
newerPr
)
if
ok
,
err
:=
resource
.
deleteAlertingRule
(
name
);
err
!=
nil
{
return
err
}
else
if
ok
{
if
err
=
resource
.
commit
(
ctx
,
r
.
client
);
err
!=
nil
{
return
err
}
}
success
=
true
return
nil
});
err
!=
nil
{
if
resourceNotFound
(
err
)
{
continue
}
return
err
}
success
=
true
}
if
!
success
{
return
v2alpha1
.
ErrAlertingRuleNotFound
...
...
@@ -483,6 +523,20 @@ func (r *ThanosRuler) DeleteAlertingRule(ctx context.Context, ruleNamespace *cor
return
nil
}
func
(
r
*
ThanosRuler
)
doRuleResourceOperation
(
pr
*
promresourcesv1
.
PrometheusRule
,
operation
func
(
newerPr
*
promresourcesv1
.
PrometheusRule
)
error
)
error
{
key
:=
pr
.
Namespace
+
"/"
+
pr
.
Name
return
retry
.
RetryOnConflict
(
retry
.
DefaultRetry
,
func
()
error
{
r
.
locker
.
Lock
(
key
)
defer
r
.
locker
.
Unlock
(
key
)
pr
,
err
:=
r
.
informer
.
Lister
()
.
PrometheusRules
(
pr
.
Namespace
)
.
Get
(
pr
.
Name
)
if
err
!=
nil
{
return
err
}
return
operation
(
pr
)
})
}
func
ruleNamespaceSelected
(
r
Ruler
,
ruleNamespace
*
corev1
.
Namespace
)
(
bool
,
error
)
{
rnSelector
,
err
:=
r
.
RuleResourceNamespaceSelector
()
if
err
!=
nil
{
...
...
@@ -499,3 +553,13 @@ func ruleNamespaceSelected(r Ruler, ruleNamespace *corev1.Namespace) (bool, erro
}
return
true
,
nil
}
func
resourceNotFound
(
err
error
)
bool
{
switch
e
:=
err
.
(
type
)
{
case
*
apierrors
.
StatusError
:
if
e
.
Status
()
.
Code
==
http
.
StatusNotFound
{
return
true
}
}
return
false
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录