[ { "name": "inode资源不足-使用率超过90", "note": "", "severity": 2, "disabled": 1, "prom_for_duration": 60, "prom_ql": "(100 - ((node_filesystem_files_free * 100) / node_filesystem_files))>90", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "内存资源不足-利用率大于75%", "note": "需要扩容或者升级配置了", "severity": 2, "disabled": 1, "prom_for_duration": 60, "prom_ql": "(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - (node_memory_Cached_bytes + node_memory_Buffers_bytes))/node_memory_MemTotal_bytes*100 > 75", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "内存资源不足-利用率大于95%", "note": "需要扩容或者升级配置了", "severity": 1, "disabled": 1, "prom_for_duration": 60, "prom_ql": "(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - (node_memory_Cached_bytes + node_memory_Buffers_bytes))/node_memory_MemTotal_bytes*100 > 95", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "文件句柄不足-使用率超过90%", "note": "可以将文件句柄limit调大,或者扩容", "severity": 2, "disabled": 1, "prom_for_duration": 60, "prom_ql": "(node_filefd_allocated{instance=\"$node\"}/node_filefd_maximum{instance=\"$node\"}*100) > 90", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "某磁盘无法正常读写", "note": "", "severity": 1, "disabled": 1, "prom_for_duration": 60, "prom_ql": "(node_filesystem_device_error{instance=\"$node\",mountpoint!~\"/var/lib/.*\",mountpoint!~\"/run.*\"}) > 0", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "磁盘需要清理了-利用率达到92%", "note": "", "severity": 1, "disabled": 1, "prom_for_duration": 60, "prom_ql": "(100 - ((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes) ) > 92 ", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "系统conntrack需要调整-使用率超过80%", "note": "", "severity": 2, "disabled": 1, "prom_for_duration": 60, "prom_ql": "node_nf_conntrack_entries / node_nf_conntrack_entries_limit*100 > 80", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "系统出现oom", "note": "", "severity": 2, "disabled": 1, "prom_for_duration": 60, "prom_ql": "increase(node_vmstat_oom_kill[1m]) > 0", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "网卡入方向丢包", "note": "", "severity": 2, "disabled": 1, "prom_for_duration": 60, "prom_ql": "rate(node_network_receive_drop_total{device=~\"e.*\"}[1m]) > 3", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "网卡出方向丢包", "note": "", "severity": 2, "disabled": 1, "prom_for_duration": 60, "prom_ql": "rate(node_network_transmit_drop_total{device=~\"e.*\"}[1m]) > 3", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "计算资源不足-机器每个核平均负载大于10", "note": "需要扩容或者升级配置了", "severity": 2, "disabled": 1, "prom_for_duration": 60, "prom_ql": "avg (node_load1) by (instance)/count(count(node_cpu_seconds_total) by (cpu,instance)) by (instance) >10", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] }, { "name": "运行进程数过多-超过3000", "note": "建议扩容", "severity": 2, "disabled": 1, "prom_for_duration": 60, "prom_ql": "node_procs_running > 3000", "prom_eval_interval": 15, "enable_stime": "00:00", "enable_etime": "23:59", "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [], "notify_repeat_step": 60, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [] } ]