采集zookeeper监控数据
This commit is contained in:
parent
80a68edd1a
commit
ff879d7d75
|
@ -41,6 +41,7 @@ import (
|
|||
_ "flashcat.cloud/categraf/inputs/switch_legacy"
|
||||
_ "flashcat.cloud/categraf/inputs/system"
|
||||
_ "flashcat.cloud/categraf/inputs/tomcat"
|
||||
_ "flashcat.cloud/categraf/inputs/zookeeper"
|
||||
)
|
||||
|
||||
const inputFilePrefix = "input."
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# [[instances]]
|
||||
# address = "127.0.0.1:2181"
|
||||
# timeout = 10
|
||||
|
||||
# important! use global unique string to specify instance
|
||||
# labels = { instance="n9e-10.2.3.4:2181" }
|
||||
|
||||
## Optional TLS Config
|
||||
# use_tls = false
|
||||
# tls_min_version = "1.2"
|
||||
# tls_ca = "/etc/categraf/ca.pem"
|
||||
# tls_cert = "/etc/categraf/cert.pem"
|
||||
# tls_key = "/etc/categraf/key.pem"
|
||||
## Use TLS but skip chain & host verification
|
||||
# insecure_skip_verify = true
|
|
@ -0,0 +1,41 @@
|
|||
# zookeeper
|
||||
|
||||
移植于 [dabealu/zookeeper-exporter](https://github.com/dabealu/zookeeper-exporter),原理就是利用 Zookeper 提供的四字命令(The Four Letter Words)获取监控信息;
|
||||
|
||||
需要注意的是,在 zookeeper v3.4.10 以后添加了四字命令白名单,需要在 zookeeper 的配置文件 `zoo.cfg` 中新增白名单配置:
|
||||
```
|
||||
4lw.commands.whitelist=mntr,ruok
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
zookeeper 插件的配置在 `conf/input.zookeeper/zookeeper.toml` 最简单的配置如下:
|
||||
|
||||
```toml
|
||||
[[instances]]
|
||||
address = "127.0.0.1:2181"
|
||||
labels = { instance="n9e-10.23.25.2:2181" }
|
||||
```
|
||||
|
||||
如果要监控多个 zookeeper 实例,就增加 instances 即可:
|
||||
|
||||
```toml
|
||||
[[instances]]
|
||||
address = "10.23.25.2:2181"
|
||||
username = ""
|
||||
password = ""
|
||||
labels = { instance="n9e-10.23.25.2:2181" }
|
||||
|
||||
[[instances]]
|
||||
address = "10.23.25.3:2181"
|
||||
username = ""
|
||||
password = ""
|
||||
labels = { instance="n9e-10.23.25.3:2181" }
|
||||
```
|
||||
|
||||
建议通过 labels 配置附加一个 instance 标签,便于后面复用监控大盘。
|
||||
|
||||
## 监控大盘和告警规则
|
||||
|
||||
该 README 的同级目录下,提供了 dashboard.json 就是监控大盘的配置,alerts.json 是告警规则,可以导入夜莺使用。
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
[
|
||||
{
|
||||
"name": "【Zookeeper】Zookeeper Down",
|
||||
"note": "",
|
||||
"prod": "",
|
||||
"algorithm": "",
|
||||
"algo_params": null,
|
||||
"delay": 0,
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 0,
|
||||
"prom_ql": "zk_up == 0",
|
||||
"prom_eval_interval": 15,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"0"
|
||||
],
|
||||
"enable_in_bg": 0,
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": [],
|
||||
"notify_users_obj": [],
|
||||
"notify_repeat_step": 60,
|
||||
"recover_duration": 0,
|
||||
"callbacks": [],
|
||||
"runbook_url": "",
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "【Zookeeper】Zookeeper instance is not ok",
|
||||
"note": "",
|
||||
"prod": "",
|
||||
"algorithm": "",
|
||||
"algo_params": null,
|
||||
"delay": 0,
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 120,
|
||||
"prom_ql": "zk_ruok == 0",
|
||||
"prom_eval_interval": 15,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"0"
|
||||
],
|
||||
"enable_in_bg": 0,
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": [],
|
||||
"notify_users_obj": [],
|
||||
"notify_repeat_step": 60,
|
||||
"recover_duration": 0,
|
||||
"callbacks": [],
|
||||
"runbook_url": "",
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "【Zookeeper】Zookeeper 集群不存在 Leader",
|
||||
"note": "",
|
||||
"prod": "",
|
||||
"algorithm": "",
|
||||
"algo_params": null,
|
||||
"delay": 0,
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 0,
|
||||
"prom_ql": "sum(zk_server_leader) == 0",
|
||||
"prom_eval_interval": 15,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"0"
|
||||
],
|
||||
"enable_in_bg": 0,
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": [],
|
||||
"notify_users_obj": [],
|
||||
"notify_repeat_step": 60,
|
||||
"recover_duration": 0,
|
||||
"callbacks": [],
|
||||
"runbook_url": "",
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "【Zookeeper】Zookeeper 集群存在多个 Leader",
|
||||
"note": "",
|
||||
"prod": "",
|
||||
"algorithm": "",
|
||||
"algo_params": null,
|
||||
"delay": 0,
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 0,
|
||||
"prom_ql": "sum(zk_server_leader) > 1",
|
||||
"prom_eval_interval": 15,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"0"
|
||||
],
|
||||
"enable_in_bg": 0,
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": [],
|
||||
"notify_users_obj": [],
|
||||
"notify_repeat_step": 60,
|
||||
"recover_duration": 0,
|
||||
"callbacks": [],
|
||||
"runbook_url": "",
|
||||
"append_tags": []
|
||||
}
|
||||
]
|
|
@ -0,0 +1,596 @@
|
|||
{
|
||||
"name": "Zookeeper",
|
||||
"tags": "",
|
||||
"configs": {
|
||||
"var": [
|
||||
{
|
||||
"definition": "label_values(zk_up,instance)",
|
||||
"name": "instance"
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"type": "row",
|
||||
"id": "204ed80c-88a7-4075-90bf-0dce6f319caa",
|
||||
"name": "分组",
|
||||
"collapsed": true,
|
||||
"layout": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
"i": "204ed80c-88a7-4075-90bf-0dce6f319caa",
|
||||
"isResizable": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_up{instance=~\"$instance\"}"
|
||||
}
|
||||
],
|
||||
"name": "Status",
|
||||
"custom": {
|
||||
"textMode": "value",
|
||||
"colorMode": "value",
|
||||
"calc": "lastNotNull",
|
||||
"colSpan": 1,
|
||||
"textSize": {}
|
||||
},
|
||||
"options": {
|
||||
"valueMappings": [
|
||||
{
|
||||
"type": "special",
|
||||
"match": {
|
||||
"special": 1
|
||||
},
|
||||
"result": {
|
||||
"text": "Up",
|
||||
"color": "#7ed321"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "special",
|
||||
"match": {
|
||||
"special": 0
|
||||
},
|
||||
"result": {
|
||||
"text": "Down",
|
||||
"color": "#d0021b"
|
||||
}
|
||||
}
|
||||
],
|
||||
"standardOptions": {}
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "stat",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 1,
|
||||
"i": "ef4804c3-5eb2-43b6-af4f-35cad5114e7e",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "ef4804c3-5eb2-43b6-af4f-35cad5114e7e"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_server_leader{instance=~\"$instance\"}"
|
||||
}
|
||||
],
|
||||
"name": "Is Leader",
|
||||
"custom": {
|
||||
"textMode": "value",
|
||||
"colorMode": "value",
|
||||
"calc": "lastNotNull",
|
||||
"colSpan": 1,
|
||||
"textSize": {}
|
||||
},
|
||||
"options": {
|
||||
"valueMappings": [
|
||||
{
|
||||
"type": "special",
|
||||
"match": {
|
||||
"special": 1
|
||||
},
|
||||
"result": {
|
||||
"text": "Yes",
|
||||
"color": "#7ed321"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "special",
|
||||
"match": {
|
||||
"special": 0
|
||||
},
|
||||
"result": {
|
||||
"text": "No",
|
||||
"color": "#d0021b"
|
||||
}
|
||||
}
|
||||
],
|
||||
"standardOptions": {}
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "stat",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 4,
|
||||
"y": 1,
|
||||
"i": "3f1d1548-fc5b-4188-bf72-d5fea7c682ca",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "f73b13a6-62d2-4b7d-9448-b0c4cb0d5144"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_ephemerals_count{instance=~\"$instance\"}"
|
||||
}
|
||||
],
|
||||
"name": "Ephemerals Count",
|
||||
"custom": {
|
||||
"textMode": "value",
|
||||
"colorMode": "value",
|
||||
"calc": "lastNotNull",
|
||||
"colSpan": 1,
|
||||
"textSize": {}
|
||||
},
|
||||
"options": {
|
||||
"valueMappings": [],
|
||||
"standardOptions": {}
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "stat",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 8,
|
||||
"y": 1,
|
||||
"i": "78ff9d1f-8d3c-440e-9fd7-4040575eddf9",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "d55c0555-b3fa-466d-a380-4a2a98af3431"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_znode_count{instance=~\"$instance\"}"
|
||||
}
|
||||
],
|
||||
"name": "Znode Count",
|
||||
"custom": {
|
||||
"textMode": "value",
|
||||
"colorMode": "value",
|
||||
"calc": "lastNotNull",
|
||||
"colSpan": 1,
|
||||
"textSize": {}
|
||||
},
|
||||
"options": {
|
||||
"valueMappings": [],
|
||||
"standardOptions": {}
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "stat",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 12,
|
||||
"y": 1,
|
||||
"i": "612b69b8-346a-419e-bb22-1d372535bac8",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "15c3ddc5-a30f-4e32-904f-4590494ee11b"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_watch_count{instance=~\"$instance\"}"
|
||||
}
|
||||
],
|
||||
"name": "Watch Count",
|
||||
"custom": {
|
||||
"textMode": "value",
|
||||
"colorMode": "value",
|
||||
"calc": "lastNotNull",
|
||||
"colSpan": 1,
|
||||
"textSize": {}
|
||||
},
|
||||
"options": {
|
||||
"valueMappings": [],
|
||||
"standardOptions": {}
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "stat",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 16,
|
||||
"y": 1,
|
||||
"i": "a2417ea6-e3d7-41cd-9985-d6ea5db43217",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "3b903da1-c9a4-4a87-b0a0-afd3defe4c15"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_version{instance=~\"$instance\"}"
|
||||
}
|
||||
],
|
||||
"name": "Version",
|
||||
"custom": {
|
||||
"showHeader": true,
|
||||
"calc": "lastNotNull",
|
||||
"displayMode": "labelsOfSeriesToRows",
|
||||
"columns": [
|
||||
"zk_host",
|
||||
"version"
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"valueMappings": [],
|
||||
"standardOptions": {}
|
||||
},
|
||||
"overrides": [
|
||||
{}
|
||||
],
|
||||
"version": "2.0.0",
|
||||
"type": "table",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 20,
|
||||
"y": 1,
|
||||
"i": "4cc8bad9-b441-4d10-abb3-7d50bb624967",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "8a3cf9d0-bb26-4b67-8fe6-c9d76e5eb618"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "rate(zk_packets_sent{instance=~\"$instance\"}[5m])",
|
||||
"legend": "{{ instance }}"
|
||||
}
|
||||
],
|
||||
"name": "Pakages Sent",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "none"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 5,
|
||||
"i": "9dcf936f-72c1-434b-af93-369c9c991bb2",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "9dcf936f-72c1-434b-af93-369c9c991bb2"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "rate(zk_packets_received{instance=~\"$instance\"}[5m])",
|
||||
"legend": "{{ instance }}"
|
||||
}
|
||||
],
|
||||
"name": "Pakages Recieved",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "none"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 5,
|
||||
"i": "bf87aba7-3d5b-427c-a0cf-426c65fbecae",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "e085001a-f693-4723-958a-b910843e0339"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_num_alive_connections{instance=~\"$instance\"}",
|
||||
"legend": "{{ instance }}"
|
||||
}
|
||||
],
|
||||
"name": "Alive Connections",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "none"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 9,
|
||||
"i": "f7043b0a-a853-4e4c-aec3-46e2dcf52586",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "5e2183f9-6277-43f9-b9be-6dbaa35cb582"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_open_file_descriptor_count{instance=~\"$instance\"}",
|
||||
"legend": "{{ instance }}"
|
||||
}
|
||||
],
|
||||
"name": "File Descriptors",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "none"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 9,
|
||||
"i": "d298a41a-26e9-46d7-b9e4-7497d1f9ef7d",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "ba3aebdc-5982-4b89-82be-c28d03776c0f"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_avg_latency{instance=~\"$instance\"}",
|
||||
"legend": "{{ instance }}"
|
||||
}
|
||||
],
|
||||
"name": "Avg Latency",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "none"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {
|
||||
"util": "seconds"
|
||||
},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 13,
|
||||
"i": "97b6b491-6d71-4e54-8ac5-9c1214f5b42e",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "a2e8a9bc-6b09-40d9-80c1-1dc0f0cbd5e2"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_min_latency{instance=~\"$instance\"}",
|
||||
"legend": "{{ instance }}"
|
||||
}
|
||||
],
|
||||
"name": "Min Latency(seconds)",
|
||||
"custom": {
|
||||
"textMode": "value",
|
||||
"colorMode": "value",
|
||||
"calc": "lastNotNull",
|
||||
"colSpan": 1,
|
||||
"textSize": {}
|
||||
},
|
||||
"options": {
|
||||
"standardOptions": {}
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "stat",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 13,
|
||||
"i": "6abdaef3-9985-4325-a563-929f515ddbbd",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "7eb7f2f2-7319-42b2-9fa1-2868fa490eaf"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_max_latency{instance=~\"$instance\"}",
|
||||
"legend": "{{ instance }}"
|
||||
}
|
||||
],
|
||||
"name": "Max Latency(seconds)",
|
||||
"custom": {
|
||||
"textMode": "value",
|
||||
"colorMode": "value",
|
||||
"calc": "lastNotNull",
|
||||
"colSpan": 1,
|
||||
"textSize": {}
|
||||
},
|
||||
"options": {
|
||||
"standardOptions": {}
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "stat",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 13,
|
||||
"i": "b2c368fa-f61c-4ddd-a7d8-d214ec67182d",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "524ca86b-2854-4ed4-a3bc-a506ae7763eb"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_outstanding_requests{instance=~\"$instance\"}",
|
||||
"legend": "{{ instance }}"
|
||||
}
|
||||
],
|
||||
"name": "Outstanding Requests",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "none"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {
|
||||
"util": "seconds"
|
||||
},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 17,
|
||||
"i": "eb83dd45-98e7-4d76-94c3-24681b4957a8",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "e24f9e2b-ce0a-4cf2-bf0d-bead1df222b2"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "zk_approximate_data_size{instance=~\"$instance\"}",
|
||||
"legend": "{{ instance }}"
|
||||
}
|
||||
],
|
||||
"name": "Approx Data Size",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "none"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {
|
||||
"util": "bytesIEC"
|
||||
},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 17,
|
||||
"i": "a1ea51d8-9b26-4eb8-8be4-97acf92f7ca2",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "c94ba892-2af9-4a31-9f0f-d7e7786c7530"
|
||||
}
|
||||
],
|
||||
"version": "2.0.0"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,219 @@
|
|||
package zookeeper
|
||||
|
||||
import (
|
||||
crypto_tls "crypto/tls"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"flashcat.cloud/categraf/config"
|
||||
"flashcat.cloud/categraf/inputs"
|
||||
"flashcat.cloud/categraf/pkg/tls"
|
||||
"flashcat.cloud/categraf/types"
|
||||
"github.com/toolkits/pkg/container/list"
|
||||
)
|
||||
|
||||
const (
|
||||
inputName = "zookeeper"
|
||||
commandNotAllowedTmpl = "warning: %q command isn't allowed at %q, see '4lw.commands.whitelist' ZK config parameter"
|
||||
instanceNotServingMessage = "This ZooKeeper instance is not currently serving requests"
|
||||
cmdNotExecutedSffx = "is not executed because it is not in the whitelist."
|
||||
)
|
||||
|
||||
var (
|
||||
versionRE = regexp.MustCompile(`^([0-9]+\.[0-9]+\.[0-9]+).*$`)
|
||||
metricNameReplacer = strings.NewReplacer("-", "_", ".", "_")
|
||||
)
|
||||
|
||||
type Instance struct {
|
||||
Address string `toml:"address"`
|
||||
Timeout int `toml:"timeout"`
|
||||
Labels map[string]string `toml:"labels"`
|
||||
tls.ClientConfig
|
||||
}
|
||||
|
||||
func (i *Instance) ZkConnect() (net.Conn, error) {
|
||||
dialer := net.Dialer{Timeout: time.Duration(i.Timeout) * time.Second}
|
||||
tcpaddr, err := net.ResolveTCPAddr("tcp", i.Address)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to resolve zookeeper address: %s: %v", i.Address, err)
|
||||
}
|
||||
|
||||
if !i.UseTLS {
|
||||
return dialer.Dial("tcp", tcpaddr.String())
|
||||
}
|
||||
tlsConfig, err := i.TLSConfig()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init tls config: %v", err)
|
||||
}
|
||||
return crypto_tls.DialWithDialer(&dialer, "tcp", tcpaddr.String(), tlsConfig)
|
||||
}
|
||||
|
||||
type Zookeeper struct {
|
||||
config.Interval
|
||||
Instances []*Instance `toml:"instances"`
|
||||
|
||||
Counter uint64
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add(inputName, func() inputs.Input {
|
||||
return &Zookeeper{}
|
||||
})
|
||||
}
|
||||
|
||||
func (z *Zookeeper) Prefix() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (z *Zookeeper) Init() error {
|
||||
if len(z.Instances) == 0 {
|
||||
return types.ErrInstancesEmpty
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (z *Zookeeper) Drop() {}
|
||||
|
||||
func (z *Zookeeper) Gather(slist *list.SafeList) {
|
||||
atomic.AddUint64(&z.Counter, 1)
|
||||
for i := range z.Instances {
|
||||
ins := z.Instances[i]
|
||||
z.wg.Add(1)
|
||||
go z.gatherOnce(slist, ins)
|
||||
}
|
||||
z.wg.Wait()
|
||||
}
|
||||
|
||||
func (z *Zookeeper) gatherOnce(slist *list.SafeList, ins *Instance) {
|
||||
defer z.wg.Done()
|
||||
|
||||
// metrics labels
|
||||
tags := map[string]string{"address": ins.Address, "zk_host": ins.Address}
|
||||
for k, v := range ins.Labels {
|
||||
tags[k] = v
|
||||
}
|
||||
|
||||
begun := time.Now()
|
||||
|
||||
// scrape use seconds
|
||||
defer func(begun time.Time) {
|
||||
use := time.Since(begun).Seconds()
|
||||
slist.PushFront(inputs.NewSample("zk_scrape_use_seconds", use, tags))
|
||||
}(begun)
|
||||
|
||||
// zk_up
|
||||
conn, err := ins.ZkConnect()
|
||||
if err != nil {
|
||||
slist.PushFront(inputs.NewSample("zk_up", 0, tags))
|
||||
log.Println("E! failed connect to zookeeper:"+ins.Address, "err:", err)
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
z.gatherMntrResult(conn, slist, ins, tags)
|
||||
z.gatherRuokResult(conn, slist, ins, tags)
|
||||
|
||||
}
|
||||
|
||||
func (z *Zookeeper) gatherMntrResult(conn net.Conn, slist *list.SafeList, ins *Instance, globalTags map[string]string) {
|
||||
res := sendZookeeperCmd(conn, ins.Address, "mntr")
|
||||
|
||||
// get slice of strings from response, like 'zk_avg_latency 0'
|
||||
lines := strings.Split(res, "\n")
|
||||
|
||||
// 'mntr' command isn't allowed in zk config, log as warning
|
||||
if strings.Contains(lines[0], cmdNotExecutedSffx) {
|
||||
slist.PushFront(inputs.NewSample("zk_up", 0, globalTags))
|
||||
log.Printf(commandNotAllowedTmpl, "mntr", ins.Address)
|
||||
return
|
||||
}
|
||||
|
||||
slist.PushFront(inputs.NewSample("zk_up", 1, globalTags))
|
||||
|
||||
// skip instance if it in a leader only state and doesnt serving client requests
|
||||
if lines[0] == instanceNotServingMessage {
|
||||
slist.PushFront(inputs.NewSample("zk_server_leader", 1, globalTags))
|
||||
return
|
||||
}
|
||||
|
||||
// split each line into key-value pair
|
||||
for _, l := range lines {
|
||||
if l == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
kv := strings.Split(strings.Replace(l, "\t", " ", -1), " ")
|
||||
key := kv[0]
|
||||
value := kv[1]
|
||||
|
||||
switch key {
|
||||
case "zk_server_state":
|
||||
if value == "leader" {
|
||||
slist.PushFront(inputs.NewSample("zk_server_leader", 1, globalTags))
|
||||
} else {
|
||||
slist.PushFront(inputs.NewSample("zk_server_leader", 0, globalTags))
|
||||
}
|
||||
|
||||
case "zk_version":
|
||||
version := versionRE.ReplaceAllString(value, "$1")
|
||||
slist.PushFront(inputs.NewSample("zk_version", 1, globalTags, map[string]string{"version": version}))
|
||||
|
||||
case "zk_peer_state":
|
||||
slist.PushFront(inputs.NewSample("zk_peer_state", 1, globalTags, map[string]string{"state": value}))
|
||||
|
||||
default:
|
||||
var k string
|
||||
k = metricNameReplacer.Replace(key)
|
||||
if !isDigit(value) {
|
||||
log.Printf("warning: skipping metric %q which holds not-digit value: %q", key, value)
|
||||
continue
|
||||
}
|
||||
slist.PushFront(inputs.NewSample(k, value, globalTags))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (z *Zookeeper) gatherRuokResult(conn net.Conn, slist *list.SafeList, ins *Instance, globalTags map[string]string) {
|
||||
res := sendZookeeperCmd(conn, ins.Address, "ruok")
|
||||
if res == "imok" {
|
||||
slist.PushFront(inputs.NewSample("zk_ruok", 1, globalTags))
|
||||
} else {
|
||||
if strings.Contains(res, cmdNotExecutedSffx) {
|
||||
log.Printf(commandNotAllowedTmpl, "ruok", ins.Address)
|
||||
}
|
||||
slist.PushFront(inputs.NewSample("zk_ruok", 0, globalTags))
|
||||
}
|
||||
}
|
||||
|
||||
func sendZookeeperCmd(conn net.Conn, host, cmd string) string {
|
||||
_, err := conn.Write([]byte(cmd))
|
||||
if err != nil {
|
||||
log.Println("E! failed to exec Zookeeper command:", cmd)
|
||||
}
|
||||
|
||||
res, err := ioutil.ReadAll(conn)
|
||||
if err != nil {
|
||||
log.Printf("E! failed read Zookeeper command: '%s' response from '%s': %s", cmd, host, err)
|
||||
}
|
||||
return string(res)
|
||||
}
|
||||
|
||||
func isDigit(in string) bool {
|
||||
// check input is an int
|
||||
if _, err := strconv.Atoi(in); err != nil {
|
||||
// not int, try float
|
||||
if _, err := strconv.ParseFloat(in, 64); err != nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
Loading…
Reference in New Issue