提交 544c93c7 编写于 作者: U Ulric Qin

Merge branch 'main' of github.com:didi/nightingale

[
{
"name": "inode资源不足-使用率超过90",
"note": "",
"severity": 2,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "(100 - ((node_filesystem_files_free * 100) / node_filesystem_files))>90",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "内存资源不足-利用率大于75%",
"note": "需要扩容或者升级配置了",
"severity": 2,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - (node_memory_Cached_bytes + node_memory_Buffers_bytes))/node_memory_MemTotal_bytes*100 > 75",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [
"dingtalk"
],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "内存资源不足-利用率大于95%",
"note": "需要扩容或者升级配置了",
"severity": 1,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - (node_memory_Cached_bytes + node_memory_Buffers_bytes))/node_memory_MemTotal_bytes*100 > 95",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [
"dingtalk"
],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "文件句柄不足-使用率超过90%",
"note": "可以将文件句柄limit调大,或者扩容",
"severity": 2,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "(node_filefd_allocated{instance=\"$node\"}/node_filefd_maximum{instance=\"$node\"}*100) > 90",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "某磁盘无法正常读写",
"note": "",
"severity": 1,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "(node_filesystem_device_error{instance=\"$node\",mountpoint!~\"/var/lib/.*\",mountpoint!~\"/run.*\"}) > 0",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "磁盘需要清理了-利用率达到92%",
"note": "",
"severity": 1,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "(100 - ((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes) ) > 92 ",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [
"dingtalk"
],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "系统conntrack需要调整-使用率超过80%",
"note": "",
"severity": 2,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "node_nf_conntrack_entries / node_nf_conntrack_entries_limit*100 > 80",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "系统出现oom",
"note": "",
"severity": 2,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "increase(node_vmstat_oom_kill[1m]) > 0",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "网卡入方向丢包",
"note": "",
"severity": 2,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "rate(node_network_receive_drop_total{device=~\"e.*\"}[1m]) > 3",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "网卡出方向丢包",
"note": "",
"severity": 2,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "rate(node_network_transmit_drop_total{device=~\"e.*\"}[1m]) > 3",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "计算资源不足-机器每个核平均负载大于10",
"note": "需要扩容或者升级配置了",
"severity": 2,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "avg (node_load1) by (instance)/count(count(node_cpu_seconds_total) by (cpu,instance)) by (instance) >10",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "运行进程数过多-超过3000",
"note": "建议扩容",
"severity": 2,
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "node_procs_running > 3000",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
}
]
\ No newline at end of file
[
{
"name": "inode资源不足-使用率超过90",
"note": "",
"severity": 2,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "(100 - ((node_filesystem_files_free * 100) / node_filesystem_files))>90",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "内存资源不足-利用率大于75%",
"note": "需要扩容或者升级配置了",
"severity": 2,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - (node_memory_Cached_bytes + node_memory_Buffers_bytes))/node_memory_MemTotal_bytes*100 > 75",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "文件句柄不足-使用率超过90%",
"note": "可以将文件句柄limit调大,或者扩容",
"severity": 2,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "(node_filefd_allocated{instance=\"$node\"}/node_filefd_maximum{instance=\"$node\"}*100) > 90",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "某磁盘无法正常读写",
"note": "",
"severity": 1,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "(node_filesystem_device_error{instance=\"$node\",mountpoint!~\"/var/lib/.*\",mountpoint!~\"/run.*\"}) > 0",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "磁盘需要清理了-利用率达到92%",
"note": "",
"severity": 1,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "(100 - ((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes) ) > 92 ",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "系统conntrack需要调整-使用率超过80%",
"note": "",
"severity": 2,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "node_nf_conntrack_entries / node_nf_conntrack_entries_limit*100 > 80",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "系统出现oom",
"note": "",
"severity": 2,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "increase(node_vmstat_oom_kill[1m]) > 0",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "网卡入方向丢包",
"note": "",
"severity": 2,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "rate(node_network_receive_drop_total{device=~\"e.*\"}[1m]) > 3",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "网卡出方向丢包",
"note": "",
"severity": 2,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "rate(node_network_transmit_drop_total{device=~\"e.*\"}[1m]) > 3",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "计算资源不足-机器loadavg1大于15",
"note": "需要扩容或者升级配置了",
"severity": 2,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "node_load1>15",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
},
{
"name": "运行进程数过多-超过3000",
"note": "建议扩容",
"severity": 2,
"disabled": 0,
"prom_for_duration": 60,
"prom_ql": "node_procs_running > 3000",
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_etime": "23:59",
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": []
}
]
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册