diff --git a/etc/alerts/kafka_by_exporter.json b/etc/alerts/kafka_by_exporter.json index 1a52b418..cb5db43b 100644 --- a/etc/alerts/kafka_by_exporter.json +++ b/etc/alerts/kafka_by_exporter.json @@ -1,58 +1,72 @@ [ - { - "name": "数据有丢失风险-同步副本数小于3", - "note": "", - "severity": 2, - "disabled": 0, - "prom_for_duration": 60, - "prom_ql": "sum(kafka_topic_partition_in_sync_replica) by (topic) < 3", - "prom_eval_interval": 15, - "enable_stime": "00:00", - "enable_etime": "23:59", - "enable_days_of_week": [ - "1", - "2", - "3", - "4", - "5", - "6", - "0" - ], - "enable_in_bg": 0, - "notify_recovered": 1, - "notify_channels": [], - "notify_repeat_step": 60, - "recover_duration": 0, - "callbacks": [], - "runbook_url": "", - "append_tags": [] - }, - { - "name": "消费能力不足-积压消息数超过50条", - "note": "", - "severity": 2, - "disabled": 0, - "prom_for_duration": 60, - "prom_ql": "sum(kafka_topic_partition_current_offset{instance=\"$instance\"}) by (topic) - sum(kafka_consumergroup_current_offset{instance=\"$instance\"}) by (topic) ", - "prom_eval_interval": 15, - "enable_stime": "00:00", - "enable_etime": "23:59", - "enable_days_of_week": [ - "1", - "2", - "3", - "4", - "5", - "6", - "0" - ], - "enable_in_bg": 0, - "notify_recovered": 1, - "notify_channels": [], - "notify_repeat_step": 60, - "recover_duration": 0, - "callbacks": [], - "runbook_url": "", - "append_tags": [] - } - ] \ No newline at end of file + { + "name": "数据有丢失风险-副本数小于3", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 2, + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "sum(kafka_topic_partition_in_sync_replica) by (topic) < 3", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "service=kafka" + ] + }, + { + "name": "消费能力不足-延迟超过5分钟", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 2, + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "kafka_consumer_lag_millis / 1000 > 300", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "service=kafka" + ] + } +] \ No newline at end of file diff --git a/etc/dashboards/kafka_by_exporter.json b/etc/dashboards/kafka_by_exporter.json index 47f4cc1c..b2666468 100644 --- a/etc/dashboards/kafka_by_exporter.json +++ b/etc/dashboards/kafka_by_exporter.json @@ -1,63 +1,362 @@ -[ - { - "name": "Kafka - 模板", - "tags": "Kafka Prometheus ", - "configs": "{\"var\":[{\"name\":\"instance\",\"definition\":\"label_values(kafka_brokers, instance)\"},{\"name\":\"job\",\"definition\":\"label_values(kafka_brokers, job)\"}]}", - "chart_groups": [ - { - "name": "overview", - "weight": 0, - "charts": [ +{ + "name": "Kafka - 模板", + "tags": "Kafka Prometheus", + "configs": { + "var": [ { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"count(count by (topic) (kafka_topic_partitions))\"}],\"name\":\"topics\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":50}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":8,\"x\":8,\"y\":0,\"i\":\"0\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"kafka_brokers\"}],\"name\":\"brokers\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":50}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":8,\"x\":0,\"y\":0,\"i\":\"1\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"sum(kafka_topic_partitions)\"}],\"name\":\"partitions\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{\"value\":50}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}", - "weight": 0 + "name": "cluster", + "definition": "label_values(kafka_brokers, cluster)", + "type": "query" } - ] - }, - { - "name": "throughput", - "weight": 1, - "charts": [ + ], + "version": "2.0.0", + "panels": [ { - "configs": "{\"targets\":[{\"expr\":\"sum(rate(kafka_topic_partition_current_offset{instance=\\\"$instance\\\"}[1m])) by (topic)\"}],\"name\":\"Message in per second\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":0,\"i\":\"0\"}}", - "weight": 0 + "id": "51502c3a-dd6f-41c7-b8f1-87b88826c96e", + "type": "row", + "name": "overview", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 0, + "i": "51502c3a-dd6f-41c7-b8f1-87b88826c96e", + "isResizable": false + }, + "collapsed": true }, { - "configs": "{\"targets\":[{\"expr\":\"sum(kafka_consumer_lag_millis{instance=\\\"$instance\\\"}) by (consumergroup, topic) \",\"legend\":\"{{consumergroup}} (topic: {{topic}})\"}],\"name\":\"Latency by Consumer Group\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"humantimeMilliseconds\"},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":2,\"i\":\"1\"}}", - "weight": 0 + "targets": [ + { + "refId": "A", + "expr": "kafka_brokers{cluster=\"$cluster\"}" + } + ], + "name": "brokers", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 50 + } + }, + "options": { + "standardOptions": {} + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 0, + "y": 1, + "i": "e2c1d271-ec43-4821-aa19-451e856af755", + "isResizable": true + }, + "id": "e2c1d271-ec43-4821-aa19-451e856af755" }, { - "configs": "{\"targets\":[{\"expr\":\"sum(rate(kafka_consumergroup_current_offset{instance=\\\"$instance\\\"}[1m])) by (topic)\"}],\"name\":\"Message consume per second\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":0,\"i\":\"2\"}}", - "weight": 0 + "targets": [ + { + "refId": "A", + "expr": "count(count by (topic) (kafka_topic_partitions{cluster=\"$cluster\"}))" + } + ], + "name": "topics", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 50 + } + }, + "options": { + "standardOptions": {} + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 6, + "y": 1, + "i": "fd3a0b9f-fd67-4360-a94c-869fee7b5b98", + "isResizable": true + }, + "id": "fd3a0b9f-fd67-4360-a94c-869fee7b5b98" }, { - "configs": "{\"targets\":[{\"expr\":\"sum(kafka_topic_partition_current_offset{instance=\\\"$instance\\\"}) by (topic) - sum(kafka_consumergroup_current_offset{instance=\\\"$instance\\\"}) by (topic) \",\"legend\":\"{{consumergroup}} (topic: {{topic}})\"}],\"name\":\"Lag by Consumer Group\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":2,\"i\":\"3\"}}", - "weight": 0 + "targets": [ + { + "refId": "A", + "expr": "sum(kafka_topic_partitions{cluster=\"$cluster\"})" + } + ], + "name": "partitions", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 50 + } + }, + "options": { + "standardOptions": {} + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 12, + "y": 1, + "i": "e228d857-746b-41b6-8d2d-0152453c46f4", + "isResizable": true + }, + "id": "e228d857-746b-41b6-8d2d-0152453c46f4" + }, + { + "targets": [ + { + "refId": "A", + "expr": "sum(kafka_topic_partition_replicas{cluster=\"$cluster\"})" + } + ], + "name": "Replicas", + "custom": { + "textMode": "valueAndName", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": {} + }, + "options": { + "standardOptions": {} + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 18, + "y": 1, + "i": "85438099-8d6b-4817-b9b9-1d0ed36029cd", + "isResizable": true + }, + "id": "85438099-8d6b-4817-b9b9-1d0ed36029cd" + }, + { + "id": "0db4aac4-86cf-44cd-950e-6c6a99be8ff4", + "type": "row", + "name": "throughput", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 4, + "i": "0db4aac4-86cf-44cd-950e-6c6a99be8ff4", + "isResizable": false + }, + "collapsed": true + }, + { + "targets": [ + { + "expr": "sum(rate(kafka_topic_partition_current_offset{cluster=\"$cluster\"}[1m])) by (topic)" + } + ], + "name": "Messages produced per second", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 0, + "y": 5, + "i": "c2ec4036-3081-45cc-b672-024c6df93833", + "isResizable": true + }, + "id": "c2ec4036-3081-45cc-b672-024c6df93833" + }, + { + "targets": [ + { + "expr": "sum(rate(kafka_consumergroup_current_offset{cluster=\"$cluster\"}[1m])) by (topic)" + } + ], + "name": "Messages consumed per second", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 8, + "y": 5, + "i": "7ad651a6-c12c-4d46-8d01-749fa776faef", + "isResizable": true + }, + "id": "7ad651a6-c12c-4d46-8d01-749fa776faef" + }, + { + "targets": [ + { + "expr": "sum(kafka_consumer_lag_millis{cluster=\"$cluster\"}) by (consumergroup, topic)", + "legend": "{{consumergroup}} (topic: {{topic}})" + } + ], + "name": "Latency by Consumer Group", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "humantimeMilliseconds" + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 16, + "y": 5, + "i": "855aa8f5-0c51-42d4-b9a4-5460b7cd0f5a", + "isResizable": true + }, + "id": "855aa8f5-0c51-42d4-b9a4-5460b7cd0f5a" + }, + { + "id": "20166830-7f85-4665-8f39-bf904267af29", + "type": "row", + "name": "patition/replicate", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 12, + "i": "20166830-7f85-4665-8f39-bf904267af29", + "isResizable": false + }, + "collapsed": true + }, + { + "targets": [ + { + "refId": "A", + "expr": "kafka_topic_partitions{cluster=\"$cluster\"}", + "legend": "{{topic}}" + } + ], + "name": "Partitions per Topic", + "custom": { + "showHeader": true, + "colorMode": "value", + "calc": "lastNotNull", + "displayMode": "seriesToRows" + }, + "options": { + "standardOptions": {} + }, + "overrides": [ + {} + ], + "version": "2.0.0", + "type": "table", + "layout": { + "h": 7, + "w": 12, + "x": 0, + "y": 13, + "i": "8837a52e-c9eb-4afa-acc1-c3a5dac72d3b", + "isResizable": true + }, + "id": "8837a52e-c9eb-4afa-acc1-c3a5dac72d3b" + }, + { + "targets": [ + { + "refId": "A", + "expr": "kafka_topic_partition_under_replicated_partition{cluster=\"$cluster\"}", + "legend": "{{topic}}-{{partition}}" + } + ], + "name": "Partitions Under Replicated", + "description": "副本不同步预案\n1. Restart the Zookeeper leader.\n2. Restart the broker\\brokers that are not replicating some of the partitions.", + "custom": { + "showHeader": true, + "colorMode": "value", + "calc": "lastNotNull", + "displayMode": "seriesToRows" + }, + "options": { + "standardOptions": {} + }, + "overrides": [ + {} + ], + "version": "2.0.0", + "type": "table", + "layout": { + "h": 7, + "w": 12, + "x": 12, + "y": 13, + "i": "dd615767-dda7-4da6-b37f-0d484553aac6", + "isResizable": true + }, + "id": "dd615767-dda7-4da6-b37f-0d484553aac6" } - ] - }, - { - "name": "patition/replicate", - "weight": 2, - "charts": [ - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"kafka_topic_partitions{instance=\\\"$instance\\\"}\",\"legend\":\"{{topic}}\"}],\"name\":\"Partitions per Topic\",\"custom\":{\"showHeader\":true,\"calc\":\"lastNotNull\",\"displayMode\":\"seriesToRows\"},\"options\":{\"standardOptions\":{}},\"overrides\":[{}],\"version\":\"2.0.0\",\"type\":\"table\",\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":0,\"i\":\"0\"}}", - "weight": 0 - }, - { - "configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"kafka_topic_partition_under_replicated_partition\",\"legend\":\"{{topic}}-{{partition}}\"}],\"name\":\"Under Replicated\",\"description\":\"副本不同步预案\\n1. Restart the Zookeeper leader.\\n2. Restart the broker\\\\brokers that are not replicating some of the partitions.\",\"custom\":{\"showHeader\":true,\"calc\":\"lastNotNull\",\"displayMode\":\"seriesToRows\"},\"options\":{\"standardOptions\":{}},\"overrides\":[{}],\"version\":\"2.0.0\",\"type\":\"table\",\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":0,\"i\":\"1\"}}", - "weight": 0 - } - ] - } - ] + ] } - ] \ No newline at end of file +} \ No newline at end of file