diff --git a/etc/alerts/linux_by_telegraf.json b/etc/alerts/linux_by_telegraf.json new file mode 100644 index 00000000..d64c6ddd --- /dev/null +++ b/etc/alerts/linux_by_telegraf.json @@ -0,0 +1,393 @@ +[ + { + "name": "有地址PING不通,请注意", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "ping_result_code != 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "有监控对象失联", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "target_up != 1", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "有端口探测失败,请注意", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "net_response_result_code != 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "机器负载-CPU较高,请关注", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "cpu_usage_idle{cpu=\"cpu-total\"} < 25", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "机器负载-内存较高,请关注", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "mem_available_percent < 25", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "硬盘-IO非常繁忙", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "rate(diskio_io_time[1m])/10 > 99", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "硬盘-预计再有4小时写满", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "predict_linear(disk_free[1h], 4*3600) < 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网卡-入向有丢包", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "increase(net_drop_in[1m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网卡-出向有丢包", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "increase(net_drop_out[1m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网络连接-TME_WAIT数量超过2万", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "netstat_tcp_time_wait > 20000", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "进程监控-有进程数为0,某进程可能挂了", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "procstat_lookup_running == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "进程监控-进程句柄限制过小", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "procstat_rlimit_num_fds_soft < 2048", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "进程监控-采集失败", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "procstat_lookup_result_code != 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] + \ No newline at end of file