diff --git a/etc/alerts/kafka_by_exporter.json b/etc/alerts/kafka_by_exporter.json new file mode 100644 index 00000000..1a52b418 --- /dev/null +++ b/etc/alerts/kafka_by_exporter.json @@ -0,0 +1,58 @@ +[ + { + "name": "数据有丢失风险-同步副本数小于3", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "sum(kafka_topic_partition_in_sync_replica) by (topic) < 3", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "消费能力不足-积压消息数超过50条", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "sum(kafka_topic_partition_current_offset{instance=\"$instance\"}) by (topic) - sum(kafka_consumergroup_current_offset{instance=\"$instance\"}) by (topic) ", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/etc/alerts/node_by_exporter.json b/etc/alerts/node_by_exporter.json new file mode 100644 index 00000000..5801cddd --- /dev/null +++ b/etc/alerts/node_by_exporter.json @@ -0,0 +1,310 @@ +[ + { + "name": "inode资源不足-使用率超过90", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "(100 - ((node_filesystem_files_free * 100) / node_filesystem_files))>90", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "内存资源不足-利用率大于75%", + "note": "需要扩容或者升级配置了", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - (node_memory_Cached_bytes + node_memory_Buffers_bytes))/node_memory_MemTotal_bytes*100 > 75", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "文件句柄不足-使用率超过90%", + "note": "可以将文件句柄limit调大,或者扩容", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "(node_filefd_allocated{instance=\"$node\"}/node_filefd_maximum{instance=\"$node\"}*100) > 90", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "某磁盘无法正常读写", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "(node_filesystem_device_error{instance=\"$node\",mountpoint!~\"/var/lib/.*\",mountpoint!~\"/run.*\"}) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "磁盘需要清理了-利用率达到92%", + "note": "", + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "(100 - ((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes) ) > 92 ", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "系统conntrack需要调整-使用率超过80%", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "node_nf_conntrack_entries / node_nf_conntrack_entries_limit*100 > 80", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "系统出现oom", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "increase(node_vmstat_oom_kill[1m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网卡入方向丢包", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "rate(node_network_receive_drop_total{device=~\"e.*\"}[1m]) > 3", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "网卡出方向丢包", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "rate(node_network_transmit_drop_total{device=~\"e.*\"}[1m]) > 3", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "计算资源不足-机器loadavg1大于15", + "note": "需要扩容或者升级配置了", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "node_load1>15", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "运行进程数过多-超过3000", + "note": "建议扩容", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "node_procs_running > 3000", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/etc/alerts/zookeeper_by_exporter.json b/etc/alerts/zookeeper_by_exporter.json new file mode 100644 index 00000000..aa7cc9cf --- /dev/null +++ b/etc/alerts/zookeeper_by_exporter.json @@ -0,0 +1,114 @@ +[ + { + "name": "Zookeeper leader 个数大于1", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "sum(zk_server_leader) > 1", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "Zookeeper 实例运行异常", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "zk_ruok == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "Zookeeper 没有 leader 了", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "sum(zk_server_leader) == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + }, + { + "name": "Zookeeper 挂掉了", + "note": "", + "severity": 2, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "zk_up == 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [] + } + ] \ No newline at end of file diff --git a/etc/dashboards/kafka_by_exporter.json b/etc/dashboards/kafka_by_exporter.json new file mode 100644 index 00000000..47f4cc1c --- /dev/null +++ b/etc/dashboards/kafka_by_exporter.json @@ -0,0 +1,63 @@ +[ + { + "name": "Kafka - 模板", + "tags": "Kafka Prometheus ", + "configs": "{\"var\":[{\"name\":\"instance\",\"definition\":\"label_values(kafka_brokers, instance)\"},{\"name\":\"job\",\"definition\":\"label_values(kafka_brokers, job)\"}]}", + "chart_groups": [ + { + "name": "overview", + "weight": 0, + "charts": [ + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"count(count by (topic) (kafka_topic_partitions))\"}],\"name\":\"topics\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":50}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":8,\"x\":8,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"kafka_brokers\"}],\"name\":\"brokers\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":50}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":8,\"x\":0,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"sum(kafka_topic_partitions)\"}],\"name\":\"partitions\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":50}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + } + ] + }, + { + "name": "throughput", + "weight": 1, + "charts": [ + { + "configs": "{\"targets\":[{\"expr\":\"sum(rate(kafka_topic_partition_current_offset{instance=\\\"$instance\\\"}[1m])) by (topic)\"}],\"name\":\"Message in per second\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"sum(kafka_consumer_lag_millis{instance=\\\"$instance\\\"}) by (consumergroup, topic) \",\"legend\":\"{{consumergroup}} (topic: {{topic}})\"}],\"name\":\"Latency by Consumer Group\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"humantimeMilliseconds\"},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":2,\"i\":\"1\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"sum(rate(kafka_consumergroup_current_offset{instance=\\\"$instance\\\"}[1m])) by (topic)\"}],\"name\":\"Message consume per second\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"sum(kafka_topic_partition_current_offset{instance=\\\"$instance\\\"}) by (topic) - sum(kafka_consumergroup_current_offset{instance=\\\"$instance\\\"}) by (topic) \",\"legend\":\"{{consumergroup}} (topic: {{topic}})\"}],\"name\":\"Lag by Consumer Group\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":2,\"i\":\"3\"}}", + "weight": 0 + } + ] + }, + { + "name": "patition/replicate", + "weight": 2, + "charts": [ + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"kafka_topic_partitions{instance=\\\"$instance\\\"}\",\"legend\":\"{{topic}}\"}],\"name\":\"Partitions per Topic\",\"custom\":{\"showHeader\":true,\"calc\":\"lastNotNull\",\"displayMode\":\"seriesToRows\"},\"options\":{\"standardOptions\":{}},\"overrides\":[{}],\"version\":\"2.0.0\",\"type\":\"table\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"kafka_topic_partition_under_replicated_partition\",\"legend\":\"{{topic}}-{{partition}}\"}],\"name\":\"Under Replicated\",\"description\":\"副本不同步预案\\n1. Restart the Zookeeper leader.\\n2. Restart the broker\\\\brokers that are not replicating some of the partitions.\",\"custom\":{\"showHeader\":true,\"calc\":\"lastNotNull\",\"displayMode\":\"seriesToRows\"},\"options\":{\"standardOptions\":{}},\"overrides\":[{}],\"version\":\"2.0.0\",\"type\":\"table\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + } + ] + } + ] + } + ] \ No newline at end of file diff --git a/etc/dashboards/node_by_exporter.json b/etc/dashboards/node_by_exporter.json new file mode 100644 index 00000000..d3093175 --- /dev/null +++ b/etc/dashboards/node_by_exporter.json @@ -0,0 +1,197 @@ +[ + { + "name": "HOST - 模板", + "tags": "Prometheus Host", + "configs": "{\"var\":[{\"name\":\"node\",\"definition\":\"label_values(node_cpu_seconds_total, instance)\",\"selected\":\"$node\",\"options\":[\"tt-fc-es01.nj:12345\",\"tt-fc-es02.nj:12345\",\"tt-fc-dev01.nj:12345\",\"10.206.0.13:9100\"]}]}", + "chart_groups": [ + { + "name": "整体概况", + "weight": 0, + "charts": [ + { + "configs": "{\"targets\":[{\"expr\":\"(node_memory_MemTotal_bytes{instance=\\\"$node\\\"} - node_memory_MemFree_bytes{instance=\\\"$node\\\"} - (node_memory_Cached_bytes{instance=\\\"$node\\\"} + node_memory_Buffers_bytes{instance=\\\"$node\\\"}))/node_memory_MemTotal_bytes{instance=\\\"$node\\\"}*100\"}],\"name\":\"内存使用率\",\"description\":\"如果内存使用率超过50%,则需要扩容或者升级配置了\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":25}},\"options\":{\"valueMappings\":[{\"type\":\"range\",\"result\":{\"color\":\"#369903\"},\"match\":{\"from\":0,\"to\":50}},{\"type\":\"range\",\"match\":{\"from\":50,\"to\":100},\"result\":{\"color\":\"#e3170d\"}}],\"standardOptions\":{\"util\":\"percent\",\"decimals\":1}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":6,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"(((count(count(node_cpu_seconds_total{instance=\\\"$node\\\"}) by (cpu))) - avg(sum by (mode)(rate(node_cpu_seconds_total{mode='idle',instance=\\\"$node\\\"}[1m])))) * 100) / count(count(node_cpu_seconds_total{instance=\\\"$node\\\"}) by (cpu))\"}],\"name\":\"CPU使用率\",\"description\":\"如果cpu使用率超过50%,可以通过top命令查看机器上是否有异常进程,如果没有异常进程,则说明服务需要扩容或者机器需要升级配置了\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":30}},\"options\":{\"valueMappings\":[{\"type\":\"range\",\"result\":{\"color\":\"#369903\"},\"match\":{\"from\":0,\"to\":50}},{\"type\":\"range\",\"match\":{\"special\":50,\"from\":50,\"to\":100},\"result\":{\"color\":\"#b22222\"}}],\"standardOptions\":{\"util\":\"percent\",\"decimals\":1}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":0,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"max(100 - ((node_filesystem_avail_bytes{instance=\\\"$node\\\",} * 100) / node_filesystem_size_bytes{instance=\\\"$node\\\"}))\",\"legend\":\"{{mountpoint}}\"}],\"name\":\"磁盘分区使用率最大值\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"percent\",\"decimals\":1},\"thresholds\":{\"steps\":[{\"value\":90,\"color\":\"#f90101\"}]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":6,\"x\":0,\"y\":1,\"i\":\"2\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_time_seconds{instance=\\\"$node\\\"} - node_boot_time_seconds{instance=\\\"$node\\\"}\"}],\"name\":\"启动时长\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"title\":null,\"value\":20}},\"options\":{\"valueMappings\":[],\"standardOptions\":{\"util\":\"humantimeSeconds\",\"decimals\":1}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":3,\"x\":21,\"y\":0,\"i\":\"3\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"(node_memory_SwapTotal_bytes{instance=\\\"$node\\\"} - node_memory_SwapFree_bytes{instance=\\\"$node\\\"})\"}],\"name\":\"SWAP内存使用\",\"description\":\"swap使用过高,会影响系统io性能,如果内存够用但swap使用很高,可以调小swappiness的值\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":30}},\"options\":{\"valueMappings\":[{\"type\":\"range\",\"result\":{\"color\":\"#369903\"},\"match\":{\"from\":0,\"to\":50}},{\"type\":\"range\",\"match\":{\"special\":50,\"from\":50,\"to\":80},\"result\":{\"color\":\"#fb9b2d\"}},{\"type\":\"range\",\"match\":{\"from\":80,\"to\":100000},\"result\":{\"color\":\"#d10000\"}}],\"standardOptions\":{\"util\":\"bytesIEC\",\"decimals\":1}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":3,\"x\":12,\"y\":0,\"i\":\"4\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_vmstat_oom_kill{instance=\\\"$node\\\"}[1m])\",\"legend\":\"OOM\"}],\"name\":\"OOM次数\",\"description\":\"大于0,说明有进程内存不够用了,需要考虑扩容或升级配置了\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"decimals\":1},\"thresholds\":{\"steps\":[{\"value\":1,\"color\":\"#f90101\"}]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":6,\"x\":18,\"y\":1,\"i\":\"5\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"max(rate(node_disk_io_time_seconds_total{instance=\\\"$node\\\"}[5m]) * 100\\n)\",\"legend\":\"{{device}}\"}],\"name\":\"磁盘设备io util最大值\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"percent\",\"decimals\":1},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":6,\"x\":12,\"y\":1,\"i\":\"6\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_filefd_allocated{instance=\\\"$node\\\"}/node_filefd_maximum{instance=\\\"$node\\\"}*100\"}],\"name\":\"FD使用率\",\"description\":\"如果超过80%,建议把文件描述符的最大个数调大,或者扩容\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":25}},\"options\":{\"valueMappings\":[{\"type\":\"range\",\"result\":{\"color\":\"#369903\"},\"match\":{\"from\":0,\"to\":50}},{\"type\":\"range\",\"match\":{\"special\":50,\"from\":50,\"to\":80},\"result\":{\"color\":\"#fb9b2d\"}},{\"type\":\"range\",\"match\":{\"from\":80,\"to\":100},\"result\":{\"color\":\"#d10000\"}}],\"standardOptions\":{\"util\":\"percent\",\"decimals\":1}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":3,\"x\":15,\"y\":0,\"i\":\"7\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"max(100 - ((node_filesystem_files_free{instance=\\\"$node\\\",mountpoint!~\\\"/var/lib/.*\\\",mountpoint!~\\\"/run/user.*\\\"} * 100) / node_filesystem_files{instance=\\\"$node\\\",mountpoint!~\\\"/var/lib/.*\\\",mountpoint!~\\\"/run/user.*\\\"}))\",\"legend\":\"{{mountpoint}}\"}],\"name\":\"inode分区使用率最大值\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"percent\",\"decimals\":1},\"thresholds\":{\"steps\":[{\"value\":50,\"color\":\"#f90101\"}]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":1,\"w\":6,\"x\":6,\"y\":1,\"i\":\"8\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"sum(node_filesystem_device_error{instance=\\\"$node\\\",mountpoint!~\\\"/var/lib/.*\\\",mountpoint!~\\\"/run.*\\\"})\",\"legend\":\"{{mountpoint}}\"}],\"name\":\"写文件错误数总和\",\"custom\":{\"textMode\":\"valueAndName\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":30}},\"options\":{\"valueMappings\":[{\"type\":\"range\",\"match\":{\"from\":0,\"to\":0},\"result\":{\"color\":\"#369903\"}},{\"type\":\"range\",\"match\":{\"from\":1,\"to\":10000},\"result\":{\"color\":\"#f0310f\"}}],\"standardOptions\":{\"decimals\":1}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":3,\"x\":18,\"y\":0,\"i\":\"9\"}}", + "weight": 0 + } + ] + }, + { + "name": "系统指标", + "weight": 1, + "charts": [ + { + "configs": "{\"targets\":[{\"expr\":\"node_procs_running{instance=\\\"$node\\\"}\",\"legend\":\"{{mountpoint}}\"}],\"name\":\"进程数\",\"description\":\"进程数超过2000,可以考虑扩容了\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{\"steps\":[{\"value\":2000,\"color\":\"#ff0000\"}]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_timex_offset_seconds{instance=\\\"$node\\\"}\",\"legend\":\"ntp偏移\"}],\"name\":\"NTP偏移\",\"description\":\"\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{\"steps\":[]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_intr_total{instance=\\\"$node\\\"}[1m])\",\"legend\":\"Interrupts\"},{\"expr\":\"irate(node_context_switches_total{instance=\\\"$node\\\"}[1m])\",\"legend\":\"context switches\"}],\"name\":\"上下文切换/中断\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_entropy_available_bits{instance=\\\"$node\\\"}\",\"legend\":\"entropy\"}],\"name\":\"熵池大小\",\"description\":\"熵池太小 ,程序使用随机函数会阻塞,可以安装 rng-tools 工具增加熵池大小,可参考\\nhttps://codeantenna.com/a/Ab6aMd3NSA \",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{\"steps\":[{\"value\":100,\"color\":\"#f70202\"}]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"3\"}}", + "weight": 0 + } + ] + }, + { + "name": "CPU详情", + "weight": 2, + "charts": [ + { + "configs": "{\"targets\":[{\"expr\":\" (avg by (mode)(rate(node_cpu_seconds_total{instance=\\\"$node\\\",mode!=\\\"idle\\\"}[1m])))*100\",\"legend\":\"{{mode}}\"}],\"name\":\"CPU使用率详情\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\" (avg by (mode)(rate(node_cpu_seconds_total{instance=\\\"$node\\\",mode=\\\"idle\\\"}[1m])))*100\",\"legend\":\"cpu_idle\"}],\"name\":\"CPU空闲率\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{\"steps\":[{\"value\":10,\"color\":\"#f90101\"}]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_load1{instance=\\\"$node\\\"}\",\"legend\":\"load1\"},{\"expr\":\"node_load5{instance=\\\"$node\\\"}\",\"legend\":\"load5\"},{\"expr\":\"node_load15{instance=\\\"$node\\\"}\",\"legend\":\"load15\"}],\"name\":\"CPU负载\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{\"steps\":[]}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + } + ] + }, + { + "name": "内存详情", + "weight": 3, + "charts": [ + { + "configs": "{\"targets\":[{\"expr\":\"node_memory_HugePages_Total{instance=\\\"$node\\\"}\",\"legend\":\"HugePages_Total\"},{\"expr\":\"node_memory_Hugepagesize_bytes{instance=\\\"$node\\\"}\",\"legend\":\"HugePages_Size\"},{\"expr\":\"node_memory_HugePages_Surp{instance=\\\"$node\\\"}\",\"legend\":\"HugePages_Surp \"},{\"expr\":\"node_memory_HugePages_Free{instance=\\\"$node\\\"}\",\"legend\":\"HugePages_Free\"},{\"expr\":\"node_memory_HugePages_Rsvd{instance=\\\"$node\\\"}\",\"legend\":\"HugePages_Rsvd\"},{\"expr\":\"node_memory_AnonHugePages_bytes{instance=\\\"$node\\\"}\",\"legend\":\"AnonHugePages\"},{\"expr\":\"node_memory_Inactive_file_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Inactive_file\"},{\"expr\":\"node_memory_Inactive_anon_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Inactive_anon\"},{\"expr\":\"node_memory_Active_file_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Active_file\"},{\"expr\":\"node_memory_Active_anon_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Active_anon\"},{\"expr\":\"node_memory_Unevictable_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Unevictable\"},{\"expr\":\"node_memory_AnonPages_bytes{instance=\\\"$node\\\"}\",\"legend\":\"AnonPages\"},{\"expr\":\"node_memory_Shmem_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Shmem\"},{\"expr\":\"node_memory_Mapped_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Mapped\"},{\"expr\":\"node_memory_Cached_bytes{instance=\\\"$node\\\"} \",\"legend\":\"Cache\"},{\"expr\":\"node_memory_SwapCached_bytes{instance=\\\"$node\\\"}\",\"legend\":\"SwapCache\"},{\"expr\":\"node_memory_Mlocked_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Mlocked\"},{\"expr\":\"node_memory_Buffers_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Buffers\"}],\"name\":\"用户态内存使用\",\"description\":\"内存指标可参考链接 [/PROC/MEMINFO之谜](http://linuxperf.com/?p=142) \",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"table\"},\"standardOptions\":{\"util\":\"bytesIEC\"},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.35,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_memory_Slab_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Slab \"},{\"expr\":\"node_memory_SReclaimable_bytes{instance=\\\"$node\\\"}\",\"legend\":\"SReclaimable \"},{\"expr\":\"node_memory_SUnreclaim_bytes{instance=\\\"$node\\\"}\",\"legend\":\"SUnreclaim \"},{\"expr\":\"node_memory_VmallocUsed_bytes{instance=\\\"$node\\\"}\",\"legend\":\"VmallocUsed\"},{\"expr\":\"node_memory_VmallocChunk_bytes{instance=\\\"$node\\\"}\",\"legend\":\"VmallocChunk\"},{\"expr\":\"node_memory_KernelStack_bytes{instance=\\\"$node\\\"}\",\"legend\":\"KernelStack\"},{\"expr\":\"node_memory_Bounce_bytes{instance=\\\"$node\\\"}\",\"legend\":\"Bounce \"}],\"name\":\"内核态内存使用\",\"description\":\"内存指标可参考链接 [/PROC/MEMINFO之谜](http://linuxperf.com/?p=142) \",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"bytesIEC\"},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_memory_DirectMap1G_bytes{instance=\\\"$node\\\"}\",\"legend\":\"DirectMap1G\"},{\"expr\":\"node_memory_DirectMap2M_bytes{instance=\\\"$node\\\"}\",\"legend\":\"DirectMap2M\"},{\"expr\":\"node_memory_DirectMap4k_bytes{instance=\\\"$node\\\"}\",\"legend\":\"DirectMap4K\"}],\"name\":\"TLB效率\",\"description\":\"/proc/meminfo中的DirectMap所统计的不是关于内存的使用,而是一个反映TLB效率的指标。TLB(Translation Lookaside Buffer)是位于CPU上的缓存,用于将内存的虚拟地址翻译成物理地址,由于TLB的大小有限,不能缓存的地址就需要访问内存里的page table来进行翻译,速度慢很多。为了尽可能地将地址放进TLB缓存,新的CPU硬件支持比4k更大的页面从而达到减少地址数量的目的, 比如2MB,4MB,甚至1GB的内存页,视不同的硬件而定。”DirectMap4k”表示映射为4kB的内存数量, “DirectMap2M”表示映射为2MB的内存数量,以此类推。所以DirectMap其实是一个反映TLB效率的指标\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":2,\"i\":\"2\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_memory_NFS_Unstable_bytes{instance=\\\"$node\\\"}\",\"legend\":\"NFS Unstable\"},{\"expr\":\"node_memory_Writeback_bytes{instance=\\\"$node\\\"}\",\"legend\":\"memory_Writeback\"},{\"expr\":\"node_memory_Dirty_bytes{instance=\\\"$node\\\"}\",\"legend\":\"memory_Dirty\"}],\"name\":\"dirty page\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":2,\"i\":\"3\"}}", + "weight": 0 + } + ] + }, + { + "name": "磁盘详情", + "weight": 4, + "charts": [ + { + "configs": "{\"targets\":[{\"expr\":\"node_filesystem_avail_bytes{instance=\\\"$node\\\",device!~'rootfs', device!~\\\"tmpfs\\\",mountpoint!~\\\"/var/lib.*\\\"}\",\"legend\":\"{{mountpoint}} - Available\"},{\"expr\":\"node_filesystem_free_bytes{instance=\\\"$node\\\",device!~'rootfs',device!~\\\"tmpfs\\\",mountpoint!~\\\"/var/lib.*\\\"}\",\"legend\":\"{{mountpoint}} - Free\"},{\"expr\":\"node_filesystem_size_bytes{instance=\\\"$node\\\",device!~'rootfs',device!~\\\"tmpfs\\\",mountpoint!~\\\"/var/lib.*\\\"}\",\"legend\":\"{{mountpoint}} - Total\"}],\"name\":\"磁盘空间\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"bytesIEC\"},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_filesystem_files{instance=\\\"$node\\\",device!~'rootfs',device!~\\\"tmpfs\\\",mountpoint!~\\\"/var/lib.*\\\"}\",\"legend\":\"{{mountpoint}} - total\"},{\"expr\":\"node_filesystem_files_free{instance=\\\"$node\\\",device!~'rootfs',device!~\\\"tmpfs\\\",mountpoint!~\\\"/var/lib.*\\\"}\",\"legend\":\"{{mountpoint}} - free\"}],\"name\":\"inode\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_filefd_maximum{instance=\\\"$node\\\"}\",\"legend\":\"Max open files\"},{\"expr\":\"node_filefd_allocated{instance=\\\"$node\\\"}\",\"legend\":\"Open files\"}],\"name\":\"fd使用\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_filesystem_readonly{instance=\\\"$node\\\",device!~'rootfs',device!~\\\"tmpfs\\\",mountpoint!~\\\"/var/lib.*\\\"}\",\"legend\":\"{{mountpoint}} - ReadOnly\"},{\"expr\":\"node_filesystem_device_error{instance=\\\"$node\\\",device!~'rootfs',device!~\\\"tmpfs\\\",mountpoint!~\\\"/var/lib.*\\\"}\",\"legend\":\"{{mountpoint}} - Device error\"}],\"name\":\"读写错误\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_disk_reads_completed_total{instance=\\\"$node\\\"}[1m])\",\"legend\":\"{{device}}-reads\"},{\"expr\":\"rate(node_disk_writes_completed_total{instance=\\\"$node\\\"}[1m])\",\"legend\":\"{{device}} - Writes\"},{\"expr\":\"rate(node_disk_reads_merged_total{instance=\\\"$node\\\"}[1m])\",\"legend\":\"{{device}} - Read merged\"},{\"expr\":\"rate(node_disk_writes_merged_total{instance=\\\"$node\\\"}[1m])\",\"legend\":\"{{device}} - Write merged\"}],\"name\":\"IO/Merged次数\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":2,\"i\":\"4\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_disk_read_bytes_total{instance=\\\"$node\\\"}[1m])\",\"legend\":\"{{device}}-Read bytes\"},{\"expr\":\"rate(node_disk_written_bytes_total{instance=\\\"$node\\\"}[1m])\",\"legend\":\"{{device}} - Written bytes\"}],\"name\":\"读写数据大小\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"bytesIEC\"},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":2,\"i\":\"5\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_disk_io_time_seconds_total{instance=\\\"$node\\\"}[1m])\",\"legend\":\"{{device}}\"}],\"name\":\"io util\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":2,\"i\":\"6\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_disk_read_time_seconds_total{instance=\\\"$node\\\"}[5m]) / rate(node_disk_reads_completed_total{instance=\\\"$node\\\"}[1m])+rate(node_disk_write_time_seconds_total{instance=\\\"$node\\\"}[5m]) / rate(node_disk_writes_completed_total{instance=\\\"$node\\\"}[1m])\",\"legend\":\"{{device}}\"}],\"name\":\"io await\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.64,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":2,\"i\":\"7\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"(rate(node_disk_read_bytes_total{instance=\\\"$node\\\"}[1m]) + rate(node_disk_written_bytes_total{instance=\\\"$node\\\"}[1m]))\\n/\\n(rate(node_disk_reads_completed_total{instance=\\\"$node\\\"}[1m]) + rate(node_disk_writes_completed_total{instance=\\\"$node\\\"}[1m]))\",\"legend\":\"avgrq-sz\"}],\"name\":\"avgrq-sz\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":4,\"i\":\"8\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_disk_io_time_weighted_seconds_total{instance=\\\"$node\\\"}[1m])\\n\",\"legend\":\"{{device}}\"}],\"name\":\"avgqu-sz\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":4,\"i\":\"9\"}}", + "weight": 0 + } + ] + }, + { + "name": "网络详情", + "weight": 5, + "charts": [ + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_network_receive_bytes_total{instance=\\\"$node\\\",device=~\\\"e.*\\\"}[1m])*8\",\"legend\":\"{{device}} - in\"},{\"expr\":\"rate(node_network_transmit_bytes_total{instance=\\\"$node\\\",device=~\\\"e.*\\\"}[1m])*8\",\"legend\":\"{{device}} - out\"}],\"name\":\"出入流量大小\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_network_receive_packets_total{instance=\\\"$node\\\",device=~\\\"e.*\\\"}[1m])\",\"legend\":\"{{device}} - in\"},{\"expr\":\"rate(node_network_transmit_packets_total{instance=\\\"$node\\\",device=~\\\"e.*\\\"}[1m])\",\"legend\":\"{{device}} - out\"}],\"name\":\"packets\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_network_receive_errs_total{instance=\\\"$node\\\",device=~\\\"e.*\\\"}[1m])\",\"legend\":\"{{device}} - in\"},{\"expr\":\"rate(node_network_transmit_errs_total{instance=\\\"$node\\\",device=~\\\"e.*\\\"}[1m])\",\"legend\":\"{{device}} - out\"}],\"name\":\"error\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"rate(node_network_receive_drop_total{instance=\\\"$node\\\",device=~\\\"e.*\\\"}[1m])\",\"legend\":\"{{device}} - in\"},{\"expr\":\"rate(node_network_transmit_drop_total{instance=\\\"$node\\\",device=~\\\"e.*\\\"}[1m])\",\"legend\":\"{{device}} - out\"}],\"name\":\"drop\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_nf_conntrack_entries{instance=\\\"$node\\\"}\",\"legend\":\"NF conntrack entries\"},{\"expr\":\"node_nf_conntrack_entries_limit{instance=\\\"$node\\\"}\",\"legend\":\"NF conntrack limit\"}],\"name\":\"nf_conntrack\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":2,\"i\":\"4\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_sockstat_TCP_alloc{instance=\\\"$node\\\"}\",\"legend\":\"TCP_alloc\"},{\"expr\":\"node_sockstat_TCP_inuse{instance=\\\"$node\\\"}\",\"legend\":\"TCP_inuse\"},{\"expr\":\"node_sockstat_TCP_orphan{instance=\\\"$node\\\"}\",\"legend\":\"TCP_orphan\"},{\"expr\":\"node_sockstat_TCP_tw{instance=\\\"$node\\\"}\",\"legend\":\"TCP_tw\"},{\"expr\":\"node_netstat_Tcp_CurrEstab{instance=\\\"$node\\\"}\",\"legend\":\"TCP_CurrEstab\"}],\"name\":\"tcp\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.27,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":2,\"i\":\"5\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"expr\":\"node_sockstat_sockets_used{instance=\\\"$node\\\"}\",\"legend\":\"Sockets_used\"}],\"name\":\"socket\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":2,\"i\":\"6\"}}", + "weight": 0 + } + ] + } + ] + } + ] \ No newline at end of file diff --git a/etc/dashboards/zookeeper_by_exporter.json b/etc/dashboards/zookeeper_by_exporter.json new file mode 100644 index 00000000..842f1f15 --- /dev/null +++ b/etc/dashboards/zookeeper_by_exporter.json @@ -0,0 +1,55 @@ +[ + { + "name": "Zookeeper - 模板", + "tags": "", + "configs": "{\"var\":[{\"name\":\"job\",\"definition\":\"label_values(zk_up,job)\"},{\"definition\":\"label_values(zk_up,instance)\",\"name\":\"instance\"}]}", + "chart_groups": [ + { + "name": "overview", + "weight": 0, + "charts": [ + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"zk_up{job=\\\"$job\\\", instance=\\\"$instance\\\"}\",\"legend\":\"up\"}],\"name\":\"up\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":40}},\"options\":{\"valueMappings\":[{\"type\":\"special\",\"match\":{\"special\":1},\"result\":{\"color\":\"#3d950e\",\"text\":\"UP\"}},{\"type\":\"special\",\"match\":{\"special\":0},\"result\":{\"color\":\"#f01414\",\"text\":\"DOWN\"}}],\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"zk_znode_count{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"legend\":\"{{instance}}\"}],\"name\":\"zk_znode_count\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":50}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"zk_watch_count{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"legend\":\"{{instance}}\"}],\"name\":\"zk_watch_count\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":50}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"zk_ephemerals_count{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"legend\":\"zk_ephemerals_count\"}],\"name\":\"zk_ephemerals_count\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":50}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"rate(zk_packets_sent{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}[5m])\",\"legend\":\"{{instance}}-sent\"},{\"expr\":\"rate(zk_packets_received{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}[5m])\",\"refId\":\"B\",\"legend\":\"{{instance}}-received\"}],\"name\":\"Pakages\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":1,\"i\":\"4\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"zk_num_alive_connections{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"legend\":\"{{instance}}\"}],\"name\":\"alive_connections\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":3,\"i\":\"5\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"zk_open_file_descriptor_count{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"legend\":\"{{instance}}-open\"},{\"expr\":\"zk_max_file_descriptor_count{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"refId\":\"B\",\"legend\":\"{{instance}}-max\"}],\"name\":\"file_descriptor\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":3,\"i\":\"6\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"zk_avg_latency{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"legend\":\"{{instance}}-avg\"},{\"expr\":\"zk_min_latency{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"refId\":\"B\",\"legend\":\"{{instance}}-min\"},{\"expr\":\"zk_max_latency{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"refId\":\"C\",\"legend\":\"{{instance}}-max\"}],\"name\":\"latency(ms)\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":3,\"i\":\"7\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"zk_outstanding_requests{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"legend\":\"{{instance}}\"}],\"name\":\"outstanding_requests\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":3,\"i\":\"8\"}}", + "weight": 0 + }, + { + "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"zk_approximate_data_size{job=~\\\"$job\\\", instance=~\\\"$instance\\\"}\",\"legend\":\"{{instance}}\"}],\"name\":\"approximate_data_size\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":1,\"i\":\"9\"}}", + "weight": 0 + } + ] + } + ] + } + ] \ No newline at end of file diff --git a/etc/metrics.yaml b/etc/metrics.yaml index d0959d20..ac97fb43 100644 --- a/etc/metrics.yaml +++ b/etc/metrics.yaml @@ -350,3 +350,145 @@ windows_system_system_calls_total: Total number of system calls (WMI source is P windows_system_system_up_time: System boot time (WMI source is PerfOS_System.SystemUpTime)(gauge) windows_system_threads: Current number of threads (WMI source is PerfOS_System.Threads)(gauge) +# [node_exporter] +# SYSTEM +# CPU context switch 次数 +node_context_switches_total: context_switches +# Interrupts 次数 +node_intr_total: Interrupts +# 运行的进程数 +node_procs_running: Processes in runnable state +# 熵池大小 +node_entropy_available_bits: Entropy available to random number generators +node_time_seconds: System time in seconds since epoch (1970) +node_boot_time_seconds: Node boot time, in unixtime +# CPU +node_cpu_seconds_total: Seconds the CPUs spent in each mode +node_load1: cpu load 1m +node_load5: cpu load 5m +node_load15: cpu load 15m + +# MEM +# 内核态 +# 用户追踪已从交换区获取但尚未修改的页面的内存 +node_memory_SwapCached_bytes: Memory that keeps track of pages that have been fetched from swap but not yet been modified +# 内核用于缓存数据结构供自己使用的内存 +node_memory_Slab_bytes: Memory used by the kernel to cache data structures for its own use +# slab中可回收的部分 +node_memory_SReclaimable_bytes: SReclaimable - Part of Slab, that might be reclaimed, such as caches +# slab中不可回收的部分 +node_memory_SUnreclaim_bytes: Part of Slab, that cannot be reclaimed on memory pressure +# Vmalloc内存区的大小 +node_memory_VmallocTotal_bytes: Total size of vmalloc memory area +# vmalloc已分配的内存,虚拟地址空间上的连续的内存 +node_memory_VmallocUsed_bytes: Amount of vmalloc area which is used +# vmalloc区可用的连续最大快的大小,通过此指标可以知道vmalloc可分配连续内存的最大值 +node_memory_VmallocChunk_bytes: Largest contigious block of vmalloc area which is free +# 内存的硬件故障删除掉的内存页的总大小 +node_memory_HardwareCorrupted_bytes: Amount of RAM that the kernel identified as corrupted / not working +# 用于在虚拟和物理内存地址之间映射的内存 +node_memory_PageTables_bytes: Memory used to map between virtual and physical memory addresses (gauge) +# 内核栈内存,常驻内存,不可回收 +node_memory_KernelStack_bytes: Kernel memory stack. This is not reclaimable +# 用来访问高端内存,复制高端内存的临时buffer,称为“bounce buffering”,会降低I/O 性能 +node_memory_Bounce_bytes: Memory used for block device bounce buffers +#用户态 +# 单个巨页大小 +node_memory_Hugepagesize_bytes: Huge Page size +# 系统分配的常驻巨页数 +node_memory_HugePages_Total: Total size of the pool of huge pages +# 系统空闲的巨页数 +node_memory_HugePages_Free: Huge pages in the pool that are not yet allocated +# 进程已申请但未使用的巨页数 +node_memory_HugePages_Rsvd: Huge pages for which a commitment to allocate from the pool has been made, but no allocation +# 超过系统设定的常驻HugePages数量的个数 +node_memory_HugePages_Surp: Huge pages in the pool above the value in /proc/sys/vm/nr_hugepages +# 透明巨页 Transparent HugePages (THP) +node_memory_AnonHugePages_bytes: Memory in anonymous huge pages +# inactivelist中的File-backed内存 +node_memory_Inactive_file_bytes: File-backed memory on inactive LRU list +# inactivelist中的Anonymous内存 +node_memory_Inactive_anon_bytes: Anonymous and swap cache on inactive LRU list, including tmpfs (shmem) +# activelist中的File-backed内存 +node_memory_Active_file_bytes: File-backed memory on active LRU list +# activelist中的Anonymous内存 +node_memory_Active_anon_bytes: Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs +# 禁止换出的页,对应 Unevictable 链表 +node_memory_Unevictable_bytes: Amount of unevictable memory that can't be swapped out for a variety of reasons +# 共享内存 +node_memory_Shmem_bytes: Used shared memory (shared between several processes, thus including RAM disks) +# 匿名页内存大小 +node_memory_AnonPages_bytes: Memory in user pages not backed by files +# 被关联的内存页大小 +node_memory_Mapped_bytes: Used memory in mapped pages files which have been mmaped, such as libraries +# file-backed内存页缓存大小 +node_memory_Cached_bytes: Parked file data (file content) cache +# 系统中有多少匿名页曾经被swap-out、现在又被swap-in并且swap-in之后页面中的内容一直没发生变化 +node_memory_SwapCached_bytes: Memory that keeps track of pages that have been fetched from swap but not yet been modified +# 被mlock()系统调用锁定的内存大小 +node_memory_Mlocked_bytes: Size of pages locked to memory using the mlock() system call +# 块设备(block device)所占用的缓存页 +node_memory_Buffers_bytes: Block device (e.g. harddisk) cache +node_memory_SwapTotal_bytes: Memory information field SwapTotal_bytes +node_memory_SwapFree_bytes: Memory information field SwapFree_bytes + +# DISK +node_filesystem_files_free: Filesystem space available to non-root users in byte +node_filesystem_free_bytes: Filesystem free space in bytes +node_filesystem_size_bytes: Filesystem size in bytes +node_filesystem_files_free: Filesystem total free file nodes +node_filesystem_files: Filesystem total free file nodes +node_filefd_maximum: Max open files +node_filefd_allocated: Open files +node_filesystem_readonly: Filesystem read-only status +node_filesystem_device_error: Whether an error occurred while getting statistics for the given device +node_disk_reads_completed_total: The total number of reads completed successfully +node_disk_writes_completed_total: The total number of writes completed successfully +node_disk_reads_merged_total: The number of reads merged +node_disk_writes_merged_total: The number of writes merged +node_disk_read_bytes_total: The total number of bytes read successfully +node_disk_written_bytes_total: The total number of bytes written successfully +node_disk_io_time_seconds_total: Total seconds spent doing I/Os +node_disk_read_time_seconds_total: The total number of seconds spent by all reads +node_disk_write_time_seconds_total: The total number of seconds spent by all writes +node_disk_io_time_weighted_seconds_total: The weighted of seconds spent doing I/Os + +# NET +node_network_receive_bytes_total: Network device statistic receive_bytes (counter) +node_network_transmit_bytes_total: Network device statistic transmit_bytes (counter) +node_network_receive_packets_total: Network device statistic receive_bytes +node_network_transmit_packets_total: Network device statistic transmit_bytes +node_network_receive_errs_total: Network device statistic receive_errs +node_network_transmit_errs_total: Network device statistic transmit_errs +node_network_receive_drop_total: Network device statistic receive_drop +node_network_transmit_drop_total: Network device statistic transmit_drop +node_nf_conntrack_entries: Number of currently allocated flow entries for connection tracking +node_sockstat_TCP_alloc: Number of TCP sockets in state alloc +node_sockstat_TCP_inuse: Number of TCP sockets in state inuse +node_sockstat_TCP_orphan: Number of TCP sockets in state orphan +node_sockstat_TCP_tw: Number of TCP sockets in state tw +node_netstat_Tcp_CurrEstab: Statistic TcpCurrEstab +node_sockstat_sockets_used: Number of IPv4 sockets in use + +# [kafka_exporter] +kafka_brokers: count of kafka_brokers (gauge) +kafka_topic_partitions: Number of partitions for this Topic (gauge) +kafka_topic_partition_current_offset: Current Offset of a Broker at Topic/Partition (gauge) +kafka_consumergroup_current_offset: Current Offset of a ConsumerGroup at Topic/Partition (gauge) +kafka_consumer_lag_millis: Current approximation of consumer lag for a ConsumerGroup at Topic/Partition (gauge) +kafka_topic_partition_under_replicated_partition: 1 if Topic/Partition is under Replicated + +# [zookeeper_exporter] +zk_znode_count: The total count of znodes stored +zk_ephemerals_count: The number of Ephemerals nodes +zk_watch_count: The number of watchers setup over Zookeeper nodes. +zk_approximate_data_size: Size of data in bytes that a zookeeper server has in its data tree +zk_outstanding_requests: Number of currently executing requests +zk_packets_sent: Count of the number of zookeeper packets sent from a server +zk_packets_received: Count of the number of zookeeper packets received by a server +zk_num_alive_connections: Number of active clients connected to a zookeeper server +zk_open_file_descriptor_count: Number of file descriptors that a zookeeper server has open +zk_max_file_descriptor_count: Maximum number of file descriptors that a zookeeper server can open +zk_avg_latency: Average time in milliseconds for requests to be processed +zk_min_latency: Maximum time in milliseconds for a request to be processed +zk_max_latency: Minimum time in milliseconds for a request to be processed \ No newline at end of file