92 lines
2.2 KiB
JSON
92 lines
2.2 KiB
JSON
|
[
|
||
|
{
|
||
|
"name": "Process X high number of open files",
|
||
|
"note": "",
|
||
|
"severity": 2,
|
||
|
"disabled": 0,
|
||
|
"prom_for_duration": 60,
|
||
|
"prom_ql": "avg by (instance) (namedprocess_namegroup_worst_fd_ratio{groupname=\"X\"}) * 100 > 80",
|
||
|
"prom_eval_interval": 15,
|
||
|
"enable_stime": "00:00",
|
||
|
"enable_etime": "23:59",
|
||
|
"enable_days_of_week": [
|
||
|
"1",
|
||
|
"2",
|
||
|
"3",
|
||
|
"4",
|
||
|
"5",
|
||
|
"6",
|
||
|
"0"
|
||
|
],
|
||
|
"enable_in_bg": 0,
|
||
|
"notify_recovered": 1,
|
||
|
"notify_channels": [],
|
||
|
"notify_repeat_step": 60,
|
||
|
"recover_duration": 0,
|
||
|
"callbacks": [],
|
||
|
"runbook_url": "",
|
||
|
"append_tags": [
|
||
|
"alertname=ProcessHighOpenFiles"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "Process X is down",
|
||
|
"note": "",
|
||
|
"severity": 1,
|
||
|
"disabled": 0,
|
||
|
"prom_for_duration": 0,
|
||
|
"prom_ql": "sum by (instance) (namedprocess_namegroup_num_procs{groupname=\"X\"}) == 0",
|
||
|
"prom_eval_interval": 15,
|
||
|
"enable_stime": "00:00",
|
||
|
"enable_etime": "23:59",
|
||
|
"enable_days_of_week": [
|
||
|
"1",
|
||
|
"2",
|
||
|
"3",
|
||
|
"4",
|
||
|
"5",
|
||
|
"6",
|
||
|
"0"
|
||
|
],
|
||
|
"enable_in_bg": 0,
|
||
|
"notify_recovered": 1,
|
||
|
"notify_channels": [],
|
||
|
"notify_repeat_step": 60,
|
||
|
"recover_duration": 0,
|
||
|
"callbacks": [],
|
||
|
"runbook_url": "",
|
||
|
"append_tags": [
|
||
|
"alertname=ProcessNotRunning"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "Process X is restarted",
|
||
|
"note": "",
|
||
|
"severity": 3,
|
||
|
"disabled": 0,
|
||
|
"prom_for_duration": 0,
|
||
|
"prom_ql": "namedprocess_namegroup_oldest_start_time_seconds{groupname=\"X\"} > time() - 60 ",
|
||
|
"prom_eval_interval": 15,
|
||
|
"enable_stime": "00:00",
|
||
|
"enable_etime": "23:59",
|
||
|
"enable_days_of_week": [
|
||
|
"1",
|
||
|
"2",
|
||
|
"3",
|
||
|
"4",
|
||
|
"5",
|
||
|
"6",
|
||
|
"0"
|
||
|
],
|
||
|
"enable_in_bg": 0,
|
||
|
"notify_recovered": 1,
|
||
|
"notify_channels": [],
|
||
|
"notify_repeat_step": 60,
|
||
|
"recover_duration": 0,
|
||
|
"callbacks": [],
|
||
|
"runbook_url": "",
|
||
|
"append_tags": [
|
||
|
"alertname=ProcessRestarted"
|
||
|
]
|
||
|
}
|
||
|
]
|