From f6fd6aed7fbf7f37e3bc361a75c26f4f15c4cb7e Mon Sep 17 00:00:00 2001 From: Ulric Qin Date: Sat, 11 Jun 2022 17:54:52 +0800 Subject: [PATCH] add some categraf alerts.json --- .../alerts/http_response_by_categraf.json | 30 + docker/n9eetc/alerts/linux_by_categraf.json | 243 +++ docker/n9eetc/alerts/mysql_by_categraf.json | 302 +++ .../alerts/net_response_by_categraf.json | 30 + docker/n9eetc/alerts/ntp_by_categraf.json | 30 + docker/n9eetc/alerts/ping_by_categraf.json | 30 + .../n9eetc/alerts/procstat_by_categraf.json | 62 + docker/n9eetc/alerts/redis_by_categraf.json | 182 ++ .../n9eetc/dashboards/linux_by_telegraf.json | 1676 +++++++++++++++-- etc/alerts/http_response_by_categraf.json | 30 + etc/alerts/linux_by_categraf.json | 243 +++ etc/alerts/mysql_by_categraf.json | 302 +++ etc/alerts/net_response_by_categraf.json | 30 + etc/alerts/ntp_by_categraf.json | 30 + etc/alerts/ping_by_categraf.json | 30 + etc/alerts/procstat_by_categraf.json | 62 + etc/alerts/redis_by_categraf.json | 182 ++ 17 files changed, 3328 insertions(+), 166 deletions(-) create mode 100644 docker/n9eetc/alerts/http_response_by_categraf.json create mode 100644 docker/n9eetc/alerts/linux_by_categraf.json create mode 100644 docker/n9eetc/alerts/mysql_by_categraf.json create mode 100644 docker/n9eetc/alerts/net_response_by_categraf.json create mode 100644 docker/n9eetc/alerts/ntp_by_categraf.json create mode 100644 docker/n9eetc/alerts/ping_by_categraf.json create mode 100644 docker/n9eetc/alerts/procstat_by_categraf.json create mode 100644 docker/n9eetc/alerts/redis_by_categraf.json create mode 100644 etc/alerts/http_response_by_categraf.json create mode 100644 etc/alerts/linux_by_categraf.json create mode 100644 etc/alerts/mysql_by_categraf.json create mode 100644 etc/alerts/net_response_by_categraf.json create mode 100644 etc/alerts/ntp_by_categraf.json create mode 100644 etc/alerts/ping_by_categraf.json create mode 100644 etc/alerts/procstat_by_categraf.json create mode 100644 etc/alerts/redis_by_categraf.json diff --git a/docker/n9eetc/alerts/http_response_by_categraf.json b/docker/n9eetc/alerts/http_response_by_categraf.json new file mode 100644 index 00000000..3908400f --- /dev/null +++ b/docker/n9eetc/alerts/http_response_by_categraf.json @@ -0,0 +1,30 @@ +[ + { + "name": "HTTP地址探测失败", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "http_response_result_code != 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/docker/n9eetc/alerts/linux_by_categraf.json b/docker/n9eetc/alerts/linux_by_categraf.json new file mode 100644 index 00000000..ee9d2be9 --- /dev/null +++ b/docker/n9eetc/alerts/linux_by_categraf.json @@ -0,0 +1,243 @@ +[ + { + "name": "监控对象失联", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "max_over_time(target_up[130s]) == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "机器负载-CPU较高,请关注", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "cpu_usage_idle{cpu=\"cpu-total\"} < 25", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "机器负载-内存较高,请关注", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "mem_available_percent < 25", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "硬盘-IO有点繁忙", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "rate(diskio_io_time[1m])/10 > 99", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "硬盘-预计再有4小时写满", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "predict_linear(disk_free[1h], 4*3600) < 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网卡-入向有丢包", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "increase(net_drop_in[1m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网卡-出向有丢包", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "increase(net_drop_out[1m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网络连接-TME_WAIT数量超过2万", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "netstat_tcp_time_wait > 20000", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } +] + \ No newline at end of file diff --git a/docker/n9eetc/alerts/mysql_by_categraf.json b/docker/n9eetc/alerts/mysql_by_categraf.json new file mode 100644 index 00000000..d5a6a94c --- /dev/null +++ b/docker/n9eetc/alerts/mysql_by_categraf.json @@ -0,0 +1,302 @@ +[ + { + "name": "MysqlInnodbLogWaits", + "note": "MySQL innodb log writes stalling", + "severity": 2, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "rate(mysql_global_status_innodb_log_waits[15m]) > 10", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlInnodbLogWaits" + ] + }, + { + "name": "MysqlSlaveIoThreadNotRunning", + "note": "MySQL Slave IO thread not running", + "severity": 1, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_io_running == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlSlaveIoThreadNotRunning" + ] + }, + { + "name": "MysqlSlaveReplicationLag", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "mysql_slave_status_master_server_id > 0 and ON (instance) (mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay) > 30", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlSlaveReplicationLag" + ] + }, + { + "name": "MysqlSlaveSqlThreadNotRunning", + "note": "MySQL Slave SQL thread not running", + "severity": 1, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_sql_running == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlSlaveSqlThreadNotRunning" + ] + }, + { + "name": "Mysql刚刚有重启,请注意", + "note": "MySQL has just been restarted, less than one minute ago", + "severity": 3, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "mysql_global_status_uptime < 60", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlRestarted" + ] + }, + { + "name": "Mysql实例挂了", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "mysql_up == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlDown" + ] + }, + { + "name": "Mysql打开了很多文件句柄,请注意", + "note": "More than 80% of MySQL files open", + "severity": 2, + "disabled": 0, + "prom_for_duration": 120, + "prom_ql": "avg by (instance) (mysql_global_status_open_files) / avg by (instance)(mysql_global_variables_open_files_limit) * 100 > 80", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlHighOpenFiles" + ] + }, + { + "name": "Mysql最近一分钟有慢查询出现", + "note": "MySQL server mysql has some new slow query", + "severity": 2, + "disabled": 0, + "prom_for_duration": 120, + "prom_ql": "increase(mysql_global_status_slow_queries[1m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlSlowQueries" + ] + }, + { + "name": "Mysql有超过60%的连接是running状态", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 120, + "prom_ql": "avg by (instance) (mysql_global_status_threads_running) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 60", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlHighThreadsRunning" + ] + }, + { + "name": "Mysql连接数已超过80%", + "note": "More than 80% of MySQL connections are in use", + "severity": 2, + "disabled": 0, + "prom_for_duration": 120, + "prom_ql": "avg by (instance) (mysql_global_status_threads_connected) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 80", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlTooManyConnections" + ] + } + ] \ No newline at end of file diff --git a/docker/n9eetc/alerts/net_response_by_categraf.json b/docker/n9eetc/alerts/net_response_by_categraf.json new file mode 100644 index 00000000..a55961e4 --- /dev/null +++ b/docker/n9eetc/alerts/net_response_by_categraf.json @@ -0,0 +1,30 @@ +[ + { + "name": "网络地址探活失败", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "net_response_result_code != 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/docker/n9eetc/alerts/ntp_by_categraf.json b/docker/n9eetc/alerts/ntp_by_categraf.json new file mode 100644 index 00000000..2ab93889 --- /dev/null +++ b/docker/n9eetc/alerts/ntp_by_categraf.json @@ -0,0 +1,30 @@ +[ + { + "name": "NTP时间偏移太大", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "ntp_offset_ms > 1000 or ntp_offset_ms < -1000", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/docker/n9eetc/alerts/ping_by_categraf.json b/docker/n9eetc/alerts/ping_by_categraf.json new file mode 100644 index 00000000..85d02d64 --- /dev/null +++ b/docker/n9eetc/alerts/ping_by_categraf.json @@ -0,0 +1,30 @@ +[ + { + "name": "PING地址探测失败", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "ping_result_code != 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/docker/n9eetc/alerts/procstat_by_categraf.json b/docker/n9eetc/alerts/procstat_by_categraf.json new file mode 100644 index 00000000..351bf6b9 --- /dev/null +++ b/docker/n9eetc/alerts/procstat_by_categraf.json @@ -0,0 +1,62 @@ +[ + { + "name": "进程监控-有进程数为0,某进程可能挂了", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "procstat_lookup_count == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "进程监控-进程句柄限制过小", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "procstat_rlimit_num_fds_soft < 2048", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } +] \ No newline at end of file diff --git a/docker/n9eetc/alerts/redis_by_categraf.json b/docker/n9eetc/alerts/redis_by_categraf.json new file mode 100644 index 00000000..aeb8bea9 --- /dev/null +++ b/docker/n9eetc/alerts/redis_by_categraf.json @@ -0,0 +1,182 @@ +[ + { + "name": "Redis Ping 延迟高(大于100毫秒)", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "redis_ping_use_seconds > 0.1", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=HighPingLatency" + ] + }, + { + "name": "Redis内存使用率较高", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "redis_maxmemory > 0 and (redis_used_memory / redis_maxmemory) > 0.85", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=RedisHighMemoryUsage" + ] + }, + { + "name": "Redis出现拒绝连接", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "(rate(redis_rejected_connections[5m])) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=RedisRejectedConnHigh" + ] + }, + { + "name": "Redis刚刚有重启,请注意", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "redis_uptime_in_seconds < 600", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=RedisLowUptime" + ] + }, + { + "name": "Redis较低的命中率", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "rate(redis_keyspace_hits[5m])\n/\n(rate(redis_keyspace_misses[5m]) + rate(redis_keyspace_hits[5m]))\n< 0.9", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=RedisLowHitRatio" + ] + }, + { + "name": "Redis驱逐率较高", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "(sum(rate(redis_evicted_keys[5m])) / sum(redis_keyspace_keys)) > 0.1", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=RedisHighKeysEvictionRatio" + ] + } + ] \ No newline at end of file diff --git a/docker/n9eetc/dashboards/linux_by_telegraf.json b/docker/n9eetc/dashboards/linux_by_telegraf.json index d6940554..990a68f8 100644 --- a/docker/n9eetc/dashboards/linux_by_telegraf.json +++ b/docker/n9eetc/dashboards/linux_by_telegraf.json @@ -1,179 +1,1523 @@ -[ - { - "name": "HOST - Telegraf 模板", - "tags": "", - "configs": "{\"var\":[{\"name\":\"ident\",\"definition\":\"label_values(system_load1,ident)\"}],\"links\":[{\"title\":\"n9e\",\"url\":\"https://n9e.gitee.io/\",\"targetBlank\":true},{\"title\":\"author\",\"url\":\"http://flashcat.cloud/\",\"targetBlank\":true}]}", - "chart_groups": [ - { - "name": "整体概况", - "weight": 0, - "charts": [ +{ + "name": "HOST - Telegraf 模板", + "tags": "", + "configs": { + "var": [ { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"count(system_load1)\"}],\"name\":\"监控机器数\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":50}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":3,\"x\":0,\"y\":0,\"i\":\"0\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"topk(10, (100-cpu_usage_idle{cpu=\\\"cpu-total\\\"}))\"}],\"name\":\"cpu使用率 top10\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":12,\"x\":12,\"y\":0,\"i\":\"1\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"topk(10, (100-mem_used_percent))\"}],\"name\":\"内存率 top10\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":9,\"x\":3,\"y\":0,\"i\":\"2\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"topk(10, (disk_used_percent{path!~\\\"/var.*\\\"}))\",\"legend\":\"{{ident}}-{{path}}\"}],\"name\":\"磁盘分区使用率 top10\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":12,\"x\":0,\"y\":1,\"i\":\"3\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"topk(10, (rate(diskio_io_time[1m])/10))\",\"legend\":\"\"}],\"name\":\"设备io util top10\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":12,\"x\":12,\"y\":1,\"i\":\"4\"}}", - "weight": 0 + "name": "ident", + "definition": "label_values(system_load1,ident)" } - ] - }, - { - "name": "单机概况", - "weight": 1, - "charts": [ + ], + "links": [ { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"100-cpu_usage_idle{ident=\\\"$ident\\\",cpu=\\\"cpu-total\\\"}\"}],\"name\":\"CPU使用率\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":30}},\"options\":{\"valueMappings\":[{\"type\":\"range\",\"match\":{\"from\":0,\"to\":50},\"result\":{\"color\":\"#129b22\"}},{\"type\":\"range\",\"match\":{\"from\":50,\"to\":100},\"result\":{\"color\":\"#f51919\"}}],\"standardOptions\":{\"util\":\"percent\",\"decimals\":1}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":6,\"y\":0,\"i\":\"0\"}}", - "weight": 0 + "title": "n9e", + "url": "https://n9e.gitee.io/", + "targetBlank": true }, { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"mem_used_percent{ident=\\\"$ident\\\"}\"}],\"name\":\"内存使用率\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":30}},\"options\":{\"valueMappings\":[{\"type\":\"range\",\"match\":{\"from\":0,\"to\":50},\"result\":{\"color\":\"#129b22\"}},{\"type\":\"range\",\"match\":{\"from\":50,\"to\":100},\"result\":{\"color\":\"#f51919\"}}],\"standardOptions\":{\"util\":\"percent\",\"decimals\":1}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":12,\"y\":0,\"i\":\"1\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"mem_swap_total{ident=\\\"$ident\\\"}-mem_swap_free{ident=\\\"$ident\\\"}\"}],\"name\":\"SWAP使用\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":40}},\"options\":{\"valueMappings\":[],\"standardOptions\":{\"util\":\"bytesIEC\",\"decimals\":1}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":3,\"x\":21,\"y\":0,\"i\":\"2\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"system_uptime{ident=\\\"$ident\\\"}\"}],\"name\":\"启动时长\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":30}},\"options\":{\"valueMappings\":[],\"standardOptions\":{\"util\":\"humantimeSeconds\",\"decimals\":1}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":0,\"y\":0,\"i\":\"3\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"disk_used_percent{ident=\\\"$ident\\\"}\",\"legend\":\"{{path}}\"}],\"name\":\"磁盘使用率\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"percent\",\"decimals\":1},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":8,\"x\":0,\"y\":1,\"i\":\"4\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"disk_inodes_used{ident=\\\"$ident\\\"}/disk_inodes_total{ident=\\\"$ident\\\"}\",\"legend\":\"{{path}}\"}],\"name\":\"inode使用率\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"percent\",\"decimals\":1},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":8,\"x\":8,\"y\":1,\"i\":\"5\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"rate(diskio_io_time{ident=\\\"$ident\\\"}[1m])/10\",\"legend\":\"{{name}}\"}],\"name\":\"io_util\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"percent\",\"decimals\":1},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":8,\"x\":16,\"y\":1,\"i\":\"6\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"linux_sysctl_fs_file_nr{ident=\\\"$ident\\\"}/linux_sysctl_fs_file_max{ident=\\\"$ident\\\"}*100\"}],\"name\":\"FD使用率\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":25}},\"options\":{\"valueMappings\":[{\"type\":\"range\",\"match\":{\"from\":0,\"to\":50},\"result\":{\"color\":\"#129b22\"}},{\"type\":\"range\",\"match\":{\"from\":50,\"to\":100},\"result\":{\"color\":\"#f51919\"}}],\"standardOptions\":{\"util\":\"percent\",\"decimals\":2}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":3,\"x\":18,\"y\":0,\"i\":\"7\"}}", - "weight": 0 + "title": "author", + "url": "http://flashcat.cloud/", + "targetBlank": true } - ] - }, - { - "name": "系统指标", - "weight": 2, - "charts": [ + ], + "version": "2.0.0", + "panels": [ { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"processes_total{ident=\\\"$ident\\\"}\"}],\"name\":\"进程总数\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{\"steps\":[{\"value\":2000,\"color\":\"#fa2a05\"}]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":0,\"i\":\"0\"}}", - "weight": 0 + "id": "0f6a1394-7cf9-4958-bcfe-2fbb59e77c12", + "type": "row", + "name": "整体概况", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 0, + "i": "0f6a1394-7cf9-4958-bcfe-2fbb59e77c12" + }, + "collapsed": true }, { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"rate(kernel_context_switches{ident=\\\"$ident\\\"}[1m])\",\"legend\":\"context_switches\"},{\"expr\":\"rate(kernel_interrupts{ident=\\\"$ident\\\"}[1m])\",\"refId\":\"B\",\"legend\":\"kernel_interrupts\"}],\"name\":\"上下文切换/中断\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":0,\"i\":\"1\"}}", - "weight": 0 + "targets": [ + { + "refId": "A", + "expr": "count(system_load1)" + } + ], + "name": "监控机器数", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 50 + } + }, + "options": { + "standardOptions": {} + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 3, + "x": 0, + "y": 1, + "i": "877b6db5-e82c-499a-9ebc-8ad72c2891a8" + }, + "id": "877b6db5-e82c-499a-9ebc-8ad72c2891a8" }, { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"kernel_entropy_avail{ident=\\\"$ident\\\"}\",\"legend\":\"entropy_avail\"}],\"name\":\"熵池大小\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{\"steps\":[{\"value\":100,\"color\":\"#f50505\"}]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}", - "weight": 0 + "targets": [ + { + "refId": "A", + "expr": "topk(10, mem_used_percent)" + } + ], + "name": "内存率 top10", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 3, + "w": 9, + "x": 3, + "y": 1, + "i": "29a3e6ae-d278-49b3-972b-f12a6c7c091c" + }, + "id": "29a3e6ae-d278-49b3-972b-f12a6c7c091c" + }, + { + "targets": [ + { + "refId": "A", + "expr": "topk(10, (100-cpu_usage_idle{cpu=\"cpu-total\"}))" + } + ], + "name": "cpu使用率 top10", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 3, + "w": 12, + "x": 12, + "y": 1, + "i": "9f2a24d5-d19f-4651-b76d-add6b9011821" + }, + "id": "9f2a24d5-d19f-4651-b76d-add6b9011821" + }, + { + "targets": [ + { + "refId": "A", + "expr": "topk(10, (disk_used_percent{path!~\"/var.*\"}))", + "legend": "{{ident}}-{{path}}" + } + ], + "name": "磁盘分区使用率 top10", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 3, + "w": 12, + "x": 0, + "y": 2, + "i": "dcd60296-db84-4562-99f3-2829c2f064a4" + }, + "id": "dcd60296-db84-4562-99f3-2829c2f064a4" + }, + { + "targets": [ + { + "refId": "A", + "expr": "topk(10, (rate(diskio_io_time[1m])/10))", + "legend": "" + } + ], + "name": "设备io util top10", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 3, + "w": 12, + "x": 12, + "y": 2, + "i": "ef7df29d-7dce-4788-ae42-d21d842c67d6" + }, + "id": "ef7df29d-7dce-4788-ae42-d21d842c67d6" + }, + { + "id": "7b2c5cb2-fe3b-4596-95a1-37da06cd6498", + "type": "row", + "name": "单机概况", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 5, + "i": "7b2c5cb2-fe3b-4596-95a1-37da06cd6498" + }, + "collapsed": true + }, + { + "targets": [ + { + "refId": "A", + "expr": "system_uptime{ident=\"$ident\"}" + } + ], + "name": "启动时长", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 30 + } + }, + "options": { + "valueMappings": [], + "standardOptions": { + "util": "humantimeSeconds", + "decimals": 1 + } + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 0, + "y": 6, + "i": "50f09231-fc5e-4f6d-9367-a3158504689b" + }, + "id": "50f09231-fc5e-4f6d-9367-a3158504689b" + }, + { + "targets": [ + { + "refId": "A", + "expr": "100-cpu_usage_idle{ident=\"$ident\",cpu=\"cpu-total\"}" + } + ], + "name": "CPU使用率", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 30 + } + }, + "options": { + "valueMappings": [ + { + "type": "range", + "match": { + "from": 0, + "to": 50 + }, + "result": { + "color": "#129b22" + } + }, + { + "type": "range", + "match": { + "from": 50, + "to": 100 + }, + "result": { + "color": "#f51919" + } + } + ], + "standardOptions": { + "util": "percent", + "decimals": 1 + } + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 6, + "y": 6, + "i": "d44e951d-c333-4ed9-9303-9c8d29da7993" + }, + "id": "d44e951d-c333-4ed9-9303-9c8d29da7993" + }, + { + "targets": [ + { + "refId": "A", + "expr": "mem_used_percent{ident=\"$ident\"}" + } + ], + "name": "内存使用率", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 30 + } + }, + "options": { + "valueMappings": [ + { + "type": "range", + "match": { + "from": 0, + "to": 50 + }, + "result": { + "color": "#129b22" + } + }, + { + "type": "range", + "match": { + "from": 50, + "to": 100 + }, + "result": { + "color": "#f51919" + } + } + ], + "standardOptions": { + "util": "percent", + "decimals": 1 + } + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 12, + "y": 6, + "i": "278c2fa1-0b19-4718-8b12-fb1c2e776258" + }, + "id": "278c2fa1-0b19-4718-8b12-fb1c2e776258" + }, + { + "targets": [ + { + "refId": "A", + "expr": "linux_sysctl_fs_file_nr{ident=\"$ident\"}/linux_sysctl_fs_file_max{ident=\"$ident\"}*100" + } + ], + "name": "FD使用率", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 25 + } + }, + "options": { + "valueMappings": [ + { + "type": "range", + "match": { + "from": 0, + "to": 50 + }, + "result": { + "color": "#129b22" + } + }, + { + "type": "range", + "match": { + "from": 50, + "to": 100 + }, + "result": { + "color": "#f51919" + } + } + ], + "standardOptions": { + "util": "percent", + "decimals": 2 + } + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 3, + "x": 18, + "y": 6, + "i": "484afcd4-7b25-4af1-8e95-88cc675f7f43" + }, + "id": "484afcd4-7b25-4af1-8e95-88cc675f7f43" + }, + { + "targets": [ + { + "refId": "A", + "expr": "mem_swap_total{ident=\"$ident\"}-mem_swap_free{ident=\"$ident\"}" + } + ], + "name": "SWAP使用", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 40 + } + }, + "options": { + "valueMappings": [], + "standardOptions": { + "util": "bytesIEC", + "decimals": 1 + } + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 3, + "x": 21, + "y": 6, + "i": "142f63d7-4979-4354-81b5-a9c5ec81fae9" + }, + "id": "142f63d7-4979-4354-81b5-a9c5ec81fae9" + }, + { + "targets": [ + { + "refId": "A", + "expr": "disk_used_percent{ident=\"$ident\"}", + "legend": "{{path}}" + } + ], + "name": "磁盘使用率", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "percent", + "decimals": 1 + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 3, + "w": 8, + "x": 0, + "y": 7, + "i": "4d6ec15c-8fdd-47db-a9f7-a57f03009e66" + }, + "id": "4d6ec15c-8fdd-47db-a9f7-a57f03009e66" + }, + { + "targets": [ + { + "refId": "A", + "expr": "disk_inodes_used{ident=\"$ident\"}/disk_inodes_total{ident=\"$ident\"}", + "legend": "{{path}}" + } + ], + "name": "inode使用率", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "percent", + "decimals": 1 + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 3, + "w": 8, + "x": 8, + "y": 7, + "i": "2991ad6b-c219-4f1d-b298-e195cf35cfec" + }, + "id": "2991ad6b-c219-4f1d-b298-e195cf35cfec" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(diskio_io_time{ident=\"$ident\"}[1m])/10", + "legend": "{{name}}" + } + ], + "name": "io_util", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "percent", + "decimals": 1 + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 3, + "w": 8, + "x": 16, + "y": 7, + "i": "935435db-5a1e-4330-b95f-825e91e9d99e" + }, + "id": "935435db-5a1e-4330-b95f-825e91e9d99e" + }, + { + "id": "1a19ca3f-3296-43ef-a36c-523ead023489", + "type": "row", + "name": "系统指标", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 10, + "i": "1a19ca3f-3296-43ef-a36c-523ead023489" + }, + "collapsed": true + }, + { + "targets": [ + { + "refId": "A", + "expr": "processes_total{ident=\"$ident\"}" + } + ], + "name": "进程总数", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": { + "steps": [ + { + "value": 2000, + "color": "#fa2a05" + } + ] + } + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 0, + "y": 11, + "i": "bab39b5e-63cf-4e88-a474-4ea8f2585d8e" + }, + "id": "bab39b5e-63cf-4e88-a474-4ea8f2585d8e" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(kernel_context_switches{ident=\"$ident\"}[1m])", + "legend": "context_switches" + }, + { + "expr": "rate(kernel_interrupts{ident=\"$ident\"}[1m])", + "refId": "B", + "legend": "kernel_interrupts" + } + ], + "name": "上下文切换/中断", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 8, + "y": 11, + "i": "0ea75485-cc11-4b44-b13f-911429d9e103" + }, + "id": "0ea75485-cc11-4b44-b13f-911429d9e103" + }, + { + "targets": [ + { + "refId": "A", + "expr": "kernel_entropy_avail{ident=\"$ident\"}", + "legend": "entropy_avail" + } + ], + "name": "熵池大小", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": { + "steps": [ + { + "value": 100, + "color": "#f50505" + } + ] + } + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 16, + "y": 11, + "i": "32d764fa-ed86-4099-b0f8-1cb8c7f67315" + }, + "id": "32d764fa-ed86-4099-b0f8-1cb8c7f67315" + }, + { + "id": "fe779989-795e-4ef6-9280-fdea929bb397", + "type": "row", + "name": "CPU", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 18, + "i": "fe779989-795e-4ef6-9280-fdea929bb397" + }, + "collapsed": true + }, + { + "targets": [ + { + "refId": "A", + "expr": "cpu_usage_idle{ident=\"$ident\",cpu=\"cpu-total\"}", + "legend": "cpu_usage_idle" + } + ], + "name": "CPU空闲率", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": { + "steps": [ + { + "value": 10, + "color": "#f20202" + } + ] + } + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 0, + "y": 19, + "i": "f5b86c4f-2104-41a4-9d01-a62ee64c04ff" + }, + "id": "f5b86c4f-2104-41a4-9d01-a62ee64c04ff" + }, + { + "targets": [ + { + "refId": "A", + "expr": "cpu_usage_guest{ident=\"$ident\",cpu=\"cpu-total\"}", + "legend": "" + }, + { + "expr": "cpu_usage_iowait{ident=\"$ident\",cpu=\"cpu-total\"}", + "refId": "B", + "legend": "" + }, + { + "expr": "cpu_usage_user{ident=\"$ident\",cpu=\"cpu-total\"}", + "refId": "C" + }, + { + "expr": "cpu_usage_system{ident=\"$ident\",cpu=\"cpu-total\"}", + "refId": "D" + }, + { + "expr": "cpu_usage_irq{ident=\"$ident\",cpu=\"cpu-total\"}", + "refId": "E" + }, + { + "expr": "cpu_usage_softirq{ident=\"$ident\",cpu=\"cpu-total\"}", + "refId": "F" + }, + { + "expr": "cpu_usage_nice{ident=\"$ident\",cpu=\"cpu-total\"}", + "refId": "G" + }, + { + "expr": "cpu_usage_steal{ident=\"$ident\",cpu=\"cpu-total\"}", + "refId": "H" + } + ], + "name": "CPU使用率详情", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 8, + "y": 19, + "i": "e833715f-065c-4b1b-9f0d-e1223b6992b8" + }, + "id": "e833715f-065c-4b1b-9f0d-e1223b6992b8" + }, + { + "targets": [ + { + "refId": "A", + "expr": "system_load15{ident=\"$ident\"}" + }, + { + "expr": "system_load1{ident=\"$ident\"}", + "refId": "B" + }, + { + "expr": "system_load5{ident=\"$ident\"}", + "refId": "C" + } + ], + "name": "CPU负载", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 16, + "y": 19, + "i": "b19f9420-9ce2-4d3c-86bf-fc247c8b760e" + }, + "id": "b19f9420-9ce2-4d3c-86bf-fc247c8b760e" + }, + { + "id": "e9ebdac6-4a87-4a79-b125-e5a258a968d0", + "type": "row", + "name": "内存详情", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 26, + "i": "e9ebdac6-4a87-4a79-b125-e5a258a968d0" + }, + "collapsed": true + }, + { + "targets": [ + { + "refId": "A", + "expr": "mem_active{ident=\"$ident\"}" + }, + { + "expr": "mem_cached{ident=\"$ident\"}", + "refId": "B" + }, + { + "expr": "mem_buffered{ident=\"$ident\"}", + "refId": "C" + }, + { + "expr": "mem_inactive{ident=\"$ident\"}", + "refId": "D" + }, + { + "expr": "mem_mapped{ident=\"$ident\"}", + "refId": "E" + }, + { + "expr": "mem_shared{ident=\"$ident\"}", + "refId": "F" + }, + { + "expr": "mem_swap_cached{ident=\"$ident\"}", + "refId": "G" + } + ], + "name": "用户态内存使用", + "description": "内存指标可参考链接 [/PROC/MEMINFO之谜](http://linuxperf.com/?p=142) ", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 12, + "x": 0, + "y": 27, + "i": "655608d2-0d6d-46ed-9e6a-c482edaeacec" + }, + "id": "655608d2-0d6d-46ed-9e6a-c482edaeacec" + }, + { + "targets": [ + { + "refId": "A", + "expr": "mem_slab{ident=\"$ident\"}" + }, + { + "expr": "mem_sreclaimable{ident=\"$ident\"}", + "refId": "B" + }, + { + "expr": "mem_sunreclaim{ident=\"$ident\"}", + "refId": "C" + }, + { + "expr": "mem_vmalloc_used{ident=\"$ident\"}", + "refId": "D" + }, + { + "expr": "mem_vmalloc_chunk{ident=\"$ident\"}", + "refId": "E" + } + ], + "name": "内核态内存使用", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 12, + "x": 12, + "y": 27, + "i": "d57fe702-e21f-45ee-9b36-31695e698059" + }, + "id": "d57fe702-e21f-45ee-9b36-31695e698059" + }, + { + "id": "893315c8-54ac-4eaf-9072-d0a2debd3404", + "type": "row", + "name": "磁盘详情", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 34, + "i": "893315c8-54ac-4eaf-9072-d0a2debd3404" + }, + "collapsed": true + }, + { + "targets": [ + { + "refId": "A", + "expr": "disk_free{ident=\"$ident\"}" + }, + { + "expr": "disk_total{ident=\"$ident\"}", + "refId": "B" + }, + { + "expr": "disk_used{ident=\"$ident\"}", + "refId": "C" + } + ], + "name": "磁盘空间", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "bytesIEC", + "decimals": null + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 0, + "y": 35, + "i": "b39143a6-9ca8-4732-9100-0a8c029440b5" + }, + "id": "b39143a6-9ca8-4732-9100-0a8c029440b5" + }, + { + "targets": [ + { + "refId": "A", + "expr": "linux_sysctl_fs_file_max{ident=\"$ident\"}" + }, + { + "expr": "linux_sysctl_fs_file_nr{ident=\"$ident\"}", + "refId": "B" + } + ], + "name": "fd使用", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 8, + "y": 35, + "i": "3a13d95d-a311-4b0b-87f2-4216cac9a533" + }, + "id": "3a13d95d-a311-4b0b-87f2-4216cac9a533" + }, + { + "targets": [ + { + "refId": "A", + "expr": "disk_inodes_total{ident=\"$ident\",path!~\"/var.*\"}", + "legend": "{{path}}-total" + }, + { + "expr": "disk_inodes_used{ident=\"$ident\",path!~\"/var.*\"}", + "refId": "B", + "legend": "{{path}}-used" + } + ], + "name": "inode", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 16, + "y": 35, + "i": "6a1fa455-a385-456a-a32b-30119082f453" + }, + "id": "6a1fa455-a385-456a-a32b-30119082f453" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(diskio_reads{ident=\"$ident\"}[1m])", + "legend": "{{name}}-read" + }, + { + "expr": "rate(diskio_writes{ident=\"$ident\"}[1m])", + "refId": "B", + "legend": "{{name}}-writes" + } + ], + "name": "IOPS", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 0, + "y": 37, + "i": "c74e5155-0e3c-4cb4-8e57-ce8af46ddf90" + }, + "id": "c74e5155-0e3c-4cb4-8e57-ce8af46ddf90" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(diskio_read_bytes{ident=\"$ident\"}[1m])", + "legend": "{{name}}-read" + }, + { + "expr": "rate(diskio_write_bytes{ident=\"$ident\"}[1m])", + "refId": "B", + "legend": "{{name}}-writes" + } + ], + "name": "IO吞吐量", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "bytesIEC", + "decimals": 0 + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 8, + "y": 37, + "i": "c522e6f5-fb3d-4fc0-9ae6-7ec002e55e95" + }, + "id": "c522e6f5-fb3d-4fc0-9ae6-7ec002e55e95" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(diskio_write_time{ident=\"$ident\"}[1m])/rate(diskio_writes{ident=\"$ident\"}[1m])+rate(diskio_read_time{ident=\"$ident\"}[1m])/rate(diskio_reads{ident=\"$ident\"}[1m])", + "legend": "{{name}}" + } + ], + "name": "iowait", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 16, + "y": 37, + "i": "47fd7f57-ed4a-43cf-86e3-628c2f697769" + }, + "id": "47fd7f57-ed4a-43cf-86e3-628c2f697769" + }, + { + "id": "f8c5e284-5e23-4646-976c-23511f4f908d", + "type": "row", + "name": "网络详情", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 44, + "i": "f8c5e284-5e23-4646-976c-23511f4f908d" + }, + "collapsed": true + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(net_bytes_recv{ident=\"$ident\",interface=~\"eth.*\"}[1m])*8", + "legend": "{{interface}}-recv" + }, + { + "expr": "rate(net_bytes_sent{ident=\"$ident\",interface=~\"eth.*\"}[1m])*8", + "refId": "B", + "legend": "{{interface}}-sent" + } + ], + "name": "网络流量", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "bytesIEC", + "decimals": 0 + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 0, + "y": 45, + "i": "7fe8774f-7d03-4514-a6b6-b626d2a95265" + }, + "id": "7fe8774f-7d03-4514-a6b6-b626d2a95265" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(net_packets_recv{ident=\"$ident\",interface=~\"eth.*\"}[1m])", + "legend": "{{interface}}-recv" + }, + { + "expr": "rate(net_packets_sent{ident=\"$ident\",interface=~\"eth.*\"}[1m])", + "refId": "B", + "legend": "{{interface}}-sent" + } + ], + "name": "packets", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "decimals": 0 + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 6, + "y": 45, + "i": "d030e5e7-06cd-42e9-b1e5-0c32b51f853e" + }, + "id": "d030e5e7-06cd-42e9-b1e5-0c32b51f853e" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(net_err_in{ident=\"$ident\",interface=~\"eth.*\"}[1m])", + "legend": "{{interface}}-in" + }, + { + "expr": "rate(net_err_out{ident=\"$ident\",interface=~\"eth.*\"}[1m])", + "refId": "B", + "legend": "{{interface}}-out" + } + ], + "name": "error", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "decimals": 0 + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 12, + "y": 45, + "i": "330f5fd9-aca5-4619-b81b-33203256d560" + }, + "id": "330f5fd9-aca5-4619-b81b-33203256d560" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(net_drop_in{ident=\"$ident\",interface=~\"eth.*\"}[1m])", + "legend": "{{interface}}-in" + }, + { + "expr": "rate(net_drop_out{ident=\"$ident\",interface=~\"eth.*\"}[1m])", + "refId": "B", + "legend": "{{interface}}-out" + } + ], + "name": "drop", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "decimals": 0 + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 18, + "y": 45, + "i": "34a73b20-56d7-4edb-b6f7-acc93d00a026" + }, + "id": "34a73b20-56d7-4edb-b6f7-acc93d00a026" + }, + { + "targets": [ + { + "refId": "A", + "expr": "netstat_tcp_established{ident=\"$ident\"}" + }, + { + "expr": "netstat_tcp_listen{ident=\"$ident\"}", + "refId": "B" + }, + { + "expr": "netstat_tcp_time_wait{ident=\"$ident\"}", + "refId": "C" + } + ], + "name": "tcp", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 24, + "x": 0, + "y": 47, + "i": "7664d34f-7bcf-4431-a6a7-4d924d2e176d" + }, + "id": "7664d34f-7bcf-4431-a6a7-4d924d2e176d" } - ] - }, - { - "name": "CPU", - "weight": 3, - "charts": [ - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"cpu_usage_guest{ident=\\\"$ident\\\",cpu=\\\"cpu-total\\\"}\",\"legend\":\"\"},{\"expr\":\"cpu_usage_iowait{ident=\\\"$ident\\\",cpu=\\\"cpu-total\\\"}\",\"refId\":\"B\",\"legend\":\"\"},{\"expr\":\"cpu_usage_user{ident=\\\"$ident\\\",cpu=\\\"cpu-total\\\"}\",\"refId\":\"C\"},{\"expr\":\"cpu_usage_system{ident=\\\"$ident\\\",cpu=\\\"cpu-total\\\"}\",\"refId\":\"D\"},{\"expr\":\"cpu_usage_irq{ident=\\\"$ident\\\",cpu=\\\"cpu-total\\\"}\",\"refId\":\"E\"},{\"expr\":\"cpu_usage_softirq{ident=\\\"$ident\\\",cpu=\\\"cpu-total\\\"}\",\"refId\":\"F\"},{\"expr\":\"cpu_usage_nice{ident=\\\"$ident\\\",cpu=\\\"cpu-total\\\"}\",\"refId\":\"G\"},{\"expr\":\"cpu_usage_steal{ident=\\\"$ident\\\",cpu=\\\"cpu-total\\\"}\",\"refId\":\"H\"}],\"name\":\"CPU使用率详情\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":0,\"i\":\"0\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"cpu_usage_idle{ident=\\\"$ident\\\",cpu=\\\"cpu-total\\\"}\",\"legend\":\"cpu_usage_idle\"}],\"name\":\"CPU空闲率\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{\"steps\":[{\"value\":10,\"color\":\"#f20202\"}]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":0,\"i\":\"1\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"system_load15{ident=\\\"$ident\\\"}\"},{\"expr\":\"system_load1{ident=\\\"$ident\\\"}\",\"refId\":\"B\"},{\"expr\":\"system_load5{ident=\\\"$ident\\\"}\",\"refId\":\"C\"}],\"name\":\"CPU负载\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}", - "weight": 0 - } - ] - }, - { - "name": "内存详情", - "weight": 4, - "charts": [ - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"mem_active{ident=\\\"$ident\\\"}\"},{\"expr\":\"mem_cached{ident=\\\"$ident\\\"}\",\"refId\":\"B\"},{\"expr\":\"mem_buffered{ident=\\\"$ident\\\"}\",\"refId\":\"C\"},{\"expr\":\"mem_inactive{ident=\\\"$ident\\\"}\",\"refId\":\"D\"},{\"expr\":\"mem_mapped{ident=\\\"$ident\\\"}\",\"refId\":\"E\"},{\"expr\":\"mem_shared{ident=\\\"$ident\\\"}\",\"refId\":\"F\"},{\"expr\":\"mem_swap_cached{ident=\\\"$ident\\\"}\",\"refId\":\"G\"}],\"name\":\"用户态内存使用\",\"description\":\"内存指标可参考链接 [/PROC/MEMINFO之谜](http://linuxperf.com/?p=142) \",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":0,\"i\":\"0\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"mem_slab{ident=\\\"$ident\\\"}\"},{\"expr\":\"mem_sreclaimable{ident=\\\"$ident\\\"}\",\"refId\":\"B\"},{\"expr\":\"mem_sunreclaim{ident=\\\"$ident\\\"}\",\"refId\":\"C\"},{\"expr\":\"mem_vmalloc_used{ident=\\\"$ident\\\"}\",\"refId\":\"D\"},{\"expr\":\"mem_vmalloc_chunk{ident=\\\"$ident\\\"}\",\"refId\":\"E\"}],\"name\":\"内核态内存使用\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":0,\"i\":\"1\"}}", - "weight": 0 - } - ] - }, - { - "name": "磁盘详情", - "weight": 5, - "charts": [ - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"disk_free{ident=\\\"$ident\\\"}\"},{\"expr\":\"disk_total{ident=\\\"$ident\\\"}\",\"refId\":\"B\"},{\"expr\":\"disk_used{ident=\\\"$ident\\\"}\",\"refId\":\"C\"}],\"name\":\"磁盘空间\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"bytesIEC\",\"decimals\":null},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":0,\"i\":\"0\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"linux_sysctl_fs_file_max{ident=\\\"$ident\\\"}\"},{\"expr\":\"linux_sysctl_fs_file_nr{ident=\\\"$ident\\\"}\",\"refId\":\"B\"}],\"name\":\"fd使用\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":0,\"i\":\"1\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"disk_inodes_total{ident=\\\"$ident\\\",path!~\\\"/var.*\\\"}\",\"legend\":\"{{path}}-total\"},{\"expr\":\"disk_inodes_used{ident=\\\"$ident\\\",path!~\\\"/var.*\\\"}\",\"refId\":\"B\",\"legend\":\"{{path}}-used\"}],\"name\":\"inode\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"rate(diskio_reads{ident=\\\"$ident\\\"}[1m])\",\"legend\":\"{{name}}-read\"},{\"expr\":\"rate(diskio_writes{ident=\\\"$ident\\\"}[1m])\",\"refId\":\"B\",\"legend\":\"{{name}}-writes\"}],\"name\":\"IOPS\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":2,\"i\":\"3\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"rate(diskio_write_time{ident=\\\"$ident\\\"}[1m])/rate(diskio_writes{ident=\\\"$ident\\\"}[1m])+rate(diskio_read_time{ident=\\\"$ident\\\"}[1m])/rate(diskio_reads{ident=\\\"$ident\\\"}[1m])\",\"legend\":\"{{name}}\"}],\"name\":\"iowait\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":2,\"i\":\"4\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"rate(diskio_read_bytes{ident=\\\"$ident\\\"}[1m])\",\"legend\":\"{{name}}-read\"},{\"expr\":\"rate(diskio_write_bytes{ident=\\\"$ident\\\"}[1m])\",\"refId\":\"B\",\"legend\":\"{{name}}-writes\"}],\"name\":\"IO吞吐量\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"bytesIEC\",\"decimals\":0},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":2,\"i\":\"5\"}}", - "weight": 0 - } - ] - }, - { - "name": "网络详情", - "weight": 6, - "charts": [ - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"rate(net_bytes_recv{ident=\\\"$ident\\\",interface=~\\\"eth.*\\\"}[1m])*8\",\"legend\":\"{{interface}}-recv\"},{\"expr\":\"rate(net_bytes_sent{ident=\\\"$ident\\\",interface=~\\\"eth.*\\\"}[1m])*8\",\"refId\":\"B\",\"legend\":\"{{interface}}-sent\"}],\"name\":\"网络流量\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"bytesIEC\",\"decimals\":0},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"rate(net_packets_recv{ident=\\\"$ident\\\",interface=~\\\"eth.*\\\"}[1m])\",\"legend\":\"{{interface}}-recv\"},{\"expr\":\"rate(net_packets_sent{ident=\\\"$ident\\\",interface=~\\\"eth.*\\\"}[1m])\",\"refId\":\"B\",\"legend\":\"{{interface}}-sent\"}],\"name\":\"packets\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"decimals\":0},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"rate(net_err_in{ident=\\\"$ident\\\",interface=~\\\"eth.*\\\"}[1m])\",\"legend\":\"{{interface}}-in\"},{\"expr\":\"rate(net_err_out{ident=\\\"$ident\\\",interface=~\\\"eth.*\\\"}[1m])\",\"refId\":\"B\",\"legend\":\"{{interface}}-out\"}],\"name\":\"error\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"decimals\":0},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"rate(net_drop_in{ident=\\\"$ident\\\",interface=~\\\"eth.*\\\"}[1m])\",\"legend\":\"{{interface}}-in\"},{\"expr\":\"rate(net_drop_out{ident=\\\"$ident\\\",interface=~\\\"eth.*\\\"}[1m])\",\"refId\":\"B\",\"legend\":\"{{interface}}-out\"}],\"name\":\"drop\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"decimals\":0},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"netstat_tcp_established{ident=\\\"$ident\\\"}\"},{\"expr\":\"netstat_tcp_listen{ident=\\\"$ident\\\"}\",\"refId\":\"B\"},{\"expr\":\"netstat_tcp_time_wait{ident=\\\"$ident\\\"}\",\"refId\":\"C\"}],\"name\":\"tcp\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":24,\"x\":0,\"y\":2,\"i\":\"4\"}}", - "weight": 0 - } - ] - } - ] + ] } -] \ No newline at end of file +} \ No newline at end of file diff --git a/etc/alerts/http_response_by_categraf.json b/etc/alerts/http_response_by_categraf.json new file mode 100644 index 00000000..3908400f --- /dev/null +++ b/etc/alerts/http_response_by_categraf.json @@ -0,0 +1,30 @@ +[ + { + "name": "HTTP地址探测失败", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "http_response_result_code != 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/etc/alerts/linux_by_categraf.json b/etc/alerts/linux_by_categraf.json new file mode 100644 index 00000000..ee9d2be9 --- /dev/null +++ b/etc/alerts/linux_by_categraf.json @@ -0,0 +1,243 @@ +[ + { + "name": "监控对象失联", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "max_over_time(target_up[130s]) == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "机器负载-CPU较高,请关注", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "cpu_usage_idle{cpu=\"cpu-total\"} < 25", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "机器负载-内存较高,请关注", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "mem_available_percent < 25", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "硬盘-IO有点繁忙", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "rate(diskio_io_time[1m])/10 > 99", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "硬盘-预计再有4小时写满", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "predict_linear(disk_free[1h], 4*3600) < 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网卡-入向有丢包", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "increase(net_drop_in[1m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网卡-出向有丢包", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "increase(net_drop_out[1m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网络连接-TME_WAIT数量超过2万", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "netstat_tcp_time_wait > 20000", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } +] + \ No newline at end of file diff --git a/etc/alerts/mysql_by_categraf.json b/etc/alerts/mysql_by_categraf.json new file mode 100644 index 00000000..d5a6a94c --- /dev/null +++ b/etc/alerts/mysql_by_categraf.json @@ -0,0 +1,302 @@ +[ + { + "name": "MysqlInnodbLogWaits", + "note": "MySQL innodb log writes stalling", + "severity": 2, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "rate(mysql_global_status_innodb_log_waits[15m]) > 10", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlInnodbLogWaits" + ] + }, + { + "name": "MysqlSlaveIoThreadNotRunning", + "note": "MySQL Slave IO thread not running", + "severity": 1, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_io_running == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlSlaveIoThreadNotRunning" + ] + }, + { + "name": "MysqlSlaveReplicationLag", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "mysql_slave_status_master_server_id > 0 and ON (instance) (mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay) > 30", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlSlaveReplicationLag" + ] + }, + { + "name": "MysqlSlaveSqlThreadNotRunning", + "note": "MySQL Slave SQL thread not running", + "severity": 1, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_sql_running == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlSlaveSqlThreadNotRunning" + ] + }, + { + "name": "Mysql刚刚有重启,请注意", + "note": "MySQL has just been restarted, less than one minute ago", + "severity": 3, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "mysql_global_status_uptime < 60", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlRestarted" + ] + }, + { + "name": "Mysql实例挂了", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "mysql_up == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlDown" + ] + }, + { + "name": "Mysql打开了很多文件句柄,请注意", + "note": "More than 80% of MySQL files open", + "severity": 2, + "disabled": 0, + "prom_for_duration": 120, + "prom_ql": "avg by (instance) (mysql_global_status_open_files) / avg by (instance)(mysql_global_variables_open_files_limit) * 100 > 80", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlHighOpenFiles" + ] + }, + { + "name": "Mysql最近一分钟有慢查询出现", + "note": "MySQL server mysql has some new slow query", + "severity": 2, + "disabled": 0, + "prom_for_duration": 120, + "prom_ql": "increase(mysql_global_status_slow_queries[1m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlSlowQueries" + ] + }, + { + "name": "Mysql有超过60%的连接是running状态", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 120, + "prom_ql": "avg by (instance) (mysql_global_status_threads_running) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 60", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlHighThreadsRunning" + ] + }, + { + "name": "Mysql连接数已超过80%", + "note": "More than 80% of MySQL connections are in use", + "severity": 2, + "disabled": 0, + "prom_for_duration": 120, + "prom_ql": "avg by (instance) (mysql_global_status_threads_connected) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 80", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MysqlTooManyConnections" + ] + } + ] \ No newline at end of file diff --git a/etc/alerts/net_response_by_categraf.json b/etc/alerts/net_response_by_categraf.json new file mode 100644 index 00000000..a55961e4 --- /dev/null +++ b/etc/alerts/net_response_by_categraf.json @@ -0,0 +1,30 @@ +[ + { + "name": "网络地址探活失败", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "net_response_result_code != 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/etc/alerts/ntp_by_categraf.json b/etc/alerts/ntp_by_categraf.json new file mode 100644 index 00000000..2ab93889 --- /dev/null +++ b/etc/alerts/ntp_by_categraf.json @@ -0,0 +1,30 @@ +[ + { + "name": "NTP时间偏移太大", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "ntp_offset_ms > 1000 or ntp_offset_ms < -1000", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/etc/alerts/ping_by_categraf.json b/etc/alerts/ping_by_categraf.json new file mode 100644 index 00000000..85d02d64 --- /dev/null +++ b/etc/alerts/ping_by_categraf.json @@ -0,0 +1,30 @@ +[ + { + "name": "PING地址探测失败", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "ping_result_code != 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/etc/alerts/procstat_by_categraf.json b/etc/alerts/procstat_by_categraf.json new file mode 100644 index 00000000..351bf6b9 --- /dev/null +++ b/etc/alerts/procstat_by_categraf.json @@ -0,0 +1,62 @@ +[ + { + "name": "进程监控-有进程数为0,某进程可能挂了", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "procstat_lookup_count == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "进程监控-进程句柄限制过小", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "procstat_rlimit_num_fds_soft < 2048", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "notify_recovered": 1, + "notify_channels": [ + "email", + "dingtalk", + "wecom" + ], + "notify_repeat_step": 60, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } +] \ No newline at end of file diff --git a/etc/alerts/redis_by_categraf.json b/etc/alerts/redis_by_categraf.json new file mode 100644 index 00000000..aeb8bea9 --- /dev/null +++ b/etc/alerts/redis_by_categraf.json @@ -0,0 +1,182 @@ +[ + { + "name": "Redis Ping 延迟高(大于100毫秒)", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "redis_ping_use_seconds > 0.1", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=HighPingLatency" + ] + }, + { + "name": "Redis内存使用率较高", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "redis_maxmemory > 0 and (redis_used_memory / redis_maxmemory) > 0.85", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=RedisHighMemoryUsage" + ] + }, + { + "name": "Redis出现拒绝连接", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "(rate(redis_rejected_connections[5m])) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=RedisRejectedConnHigh" + ] + }, + { + "name": "Redis刚刚有重启,请注意", + "note": "", + "severity": 3, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "redis_uptime_in_seconds < 600", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=RedisLowUptime" + ] + }, + { + "name": "Redis较低的命中率", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "rate(redis_keyspace_hits[5m])\n/\n(rate(redis_keyspace_misses[5m]) + rate(redis_keyspace_hits[5m]))\n< 0.9", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=RedisLowHitRatio" + ] + }, + { + "name": "Redis驱逐率较高", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "(sum(rate(redis_evicted_keys[5m])) / sum(redis_keyspace_keys)) > 0.1", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=RedisHighKeysEvictionRatio" + ] + } + ] \ No newline at end of file