add apiserver&coredns dashboards

This commit is contained in:
kongfei 2022-08-08 12:25:36 +08:00
parent e330ae6695
commit 3a77b8d354
2 changed files with 1046 additions and 0 deletions

504
prometheus/coredns.json Normal file
View File

@ -0,0 +1,504 @@
{
"name": "Kubernetes / CoreDNS",
"tags": "Categraf",
"configs": {
"version": "2.0.0",
"links": [],
"var": [
{
"name": "instance",
"definition": "label_values(up{job=\"coredns\"}, instance)",
"multi": true,
"allOption": true
},
{
"name": "protocol",
"definition": "label_values(coredns_dns_requests_total, proto)",
"allOption": false,
"multi": false
}
],
"panels": [
{
"targets": [
{
"refId": "A",
"expr": "up{job=\"coredns\", instance=~\"$instance\"}",
"legend": "{{ instance }}"
}
],
"name": "CoreDNS - Health Status",
"links": [],
"description": "core dns健康状态",
"custom": {
"textMode": "value",
"colorMode": "value",
"calc": "lastNotNull",
"colSpan": 1,
"textSize": {}
},
"options": {
"valueMappings": [
{
"options": {
"0": {
"text": "DOWN"
},
"1": {
"text": "UP"
}
},
"type": "value"
}
],
"standardOptions": {
"util": "none"
}
},
"version": "2.0.0",
"type": "stat",
"layout": {
"h": 3,
"w": 24,
"x": 0,
"y": 0,
"i": "5d6560c5-6137-4632-bb88-ff8c9cf42e9d"
},
"id": "5d6560c5-6137-4632-bb88-ff8c9cf42e9d"
},
{
"targets": [
{
"refId": "A",
"expr": "rate(process_cpu_seconds_total{job=\"coredns\", instance=~\"$instance\"}[5m])",
"legend": "{{ instance }}"
}
],
"name": "CoreDNS - CPU Usage by instance",
"links": [],
"description": "coredns cpu使用率",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "percentUnit",
"decimals": 2
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 3,
"i": "9c187c1c-f5cd-4aab-af81-09169948ab82"
},
"id": "9c187c1c-f5cd-4aab-af81-09169948ab82"
},
{
"targets": [
{
"refId": "A",
"expr": "process_resident_memory_bytes{job=\"coredns\", instance=~\"$instance\"}",
"legend": "{{ instance }}"
}
],
"name": "CoreDNS - Memory Usage by instance",
"links": [],
"description": "coredns 内存使用量",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "bytesIEC"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 3,
"i": "8cff2618-b2d4-4fb4-bfc2-d1d4c4f1b35c"
},
"id": "8cff2618-b2d4-4fb4-bfc2-d1d4c4f1b35c"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\",proto=\"$protocol\"}[5m]))",
"legend": "total $protocol requests"
}
],
"name": "CoreDNS - Total DNS Requests ($protocol)",
"links": [],
"description": "coredns请求量",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 11,
"i": "69690063-d044-4547-9f5f-126e5f8bf55a"
},
"id": "69690063-d044-4547-9f5f-126e5f8bf55a"
},
{
"targets": [
{
"refId": "A",
"expr": "avg(rate(coredns_dns_request_size_bytes_sum{instance=~\"$instance\",proto=\"$protocol\"}[5m])) by (proto)",
"legend": "average $protocol packet size"
}
],
"name": "CoreDNS - Average Packet Size ($protocol)",
"links": [],
"description": "coredns 平均包大小",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {
"util": "bytesIEC"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 11,
"i": "0c8ac9ee-2a3e-4e7e-b338-748d79f6cbb6"
},
"id": "0c8ac9ee-2a3e-4e7e-b338-748d79f6cbb6"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\"}[5m])) by (type)",
"legend": "{{ type }}"
}
],
"name": "CoreDNS - Requests by type",
"links": [],
"description": "coredns 请求量,按请求类型统计",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 19,
"i": "a6928b49-cf0a-443e-a8fd-b999685df0be"
},
"id": "a6928b49-cf0a-443e-a8fd-b999685df0be"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(rate(coredns_dns_responses_total{instance=~\"$instance\"}[5m])) by (rcode)",
"legend": "{{ rcode }}"
}
],
"name": "CoreDNS - Requests by return code",
"links": [],
"description": "coredns 请求量 按返回码统计",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 19,
"i": "94713dc3-acb7-43b5-ae2f-399b2da61763"
},
"id": "94713dc3-acb7-43b5-ae2f-399b2da61763"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(rate(coredns_forward_requests_total[5m]))",
"legend": "total forward requests"
}
],
"name": "CoreDNS - Total Forward Requests",
"links": [],
"description": "coredns转发请求量",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 27,
"i": "b15af6b1-107d-4246-9eec-06fd370d4d35"
},
"id": "b15af6b1-107d-4246-9eec-06fd370d4d35"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(rate(coredns_forward_responses_total{rcode=\"SERVFAIL|REFUSED\"}[5m])) by (rcode)",
"legend": "{{ rcode }}"
}
],
"name": "CoreDNS - DNS Errors",
"links": [],
"description": "coredns error",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 27,
"i": "6a656941-722f-4815-901a-438f677812cd"
},
"id": "6a656941-722f-4815-901a-438f677812cd"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(rate(coredns_cache_hits_total{instance=~\"$instance\"}[5m])) by (type)",
"legend": "{{ type }}"
},
{
"refId": "B",
"expr": "sum(rate(coredns_cache_misses_total{instance=~\"$instance\"}[5m])) by (type)",
"legend": "misses"
}
],
"name": "CoreDNS - Cache Hits / Misses",
"links": [],
"description": "coredns缓存命中/缺失次数",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 35,
"i": "bccb6071-786c-44ed-8e5e-8b47a8c5349e"
},
"id": "bccb6071-786c-44ed-8e5e-8b47a8c5349e"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(coredns_cache_entries) by (type)",
"legend": "{{ type }}"
}
],
"name": "CoreDNS - Cache Size",
"links": [],
"description": "coredns cache 条目数",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "bytesIEC"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 35,
"i": "331d24a5-65e5-4f89-ab92-38d0ede3cb61"
},
"id": "331d24a5-65e5-4f89-ab92-38d0ede3cb61"
}
]
}
}

View File

@ -0,0 +1,542 @@
{
"name": "Kubernetes / API Server",
"tags": "Categraf",
"configs": {
"version": "2.0.0",
"links": [],
"var": [],
"panels": [
{
"targets": [
{
"refId": "A",
"expr": "up{job=\"apiserver\"}",
"legend": "{{ instance }}"
}
],
"name": "API Server - Health Status",
"links": [],
"description": "apiserver的实例健康状态0表示down1表示up",
"custom": {
"textMode": "value",
"colorMode": "value",
"calc": "lastNotNull",
"colSpan": 1,
"textSize": {}
},
"options": {
"valueMappings": [
{
"options": {
"0": {
"text": "DOWN"
},
"1": {
"text": "UP"
}
},
"type": "value"
}
],
"standardOptions": {
"util": "none"
}
},
"version": "2.0.0",
"type": "stat",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 0,
"i": "98f46bc1-c078-40f2-915c-f0836957bf2f",
"isResizable": true
},
"id": "98f46bc1-c078-40f2-915c-f0836957bf2f"
},
{
"targets": [
{
"refId": "A",
"expr": "apiserver_requested_deprecated_apis",
"legend": ""
}
],
"name": "Deprecated Kubernetes Resources",
"links": [],
"description": "当前版本apiserver使用未来版本中要移除的资源",
"custom": {
"showHeader": true,
"colorMode": "value",
"calc": "lastNotNull",
"displayMode": "labelsOfSeriesToRows",
"columns": [
"group",
"version",
"resource",
"removed_release"
],
"sortOrder": "ascend"
},
"options": {
"standardOptions": {}
},
"overrides": [
{}
],
"version": "2.0.0",
"type": "table",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 0,
"i": "73beb13a-bd10-4a68-bb9e-5b9ab63da154",
"isResizable": true
},
"id": "73beb13a-bd10-4a68-bb9e-5b9ab63da154"
},
{
"targets": [
{
"refId": "A",
"expr": "sum by (code) (rate(apiserver_request_total[5m]))",
"legend": "{{ code }}"
}
],
"name": "API Server - HTTP Requests by code",
"links": [],
"description": "按照返回码分类统计apiserver请求数",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 8,
"i": "1cfa42b1-9dcf-471c-90ff-8ffe656d4b11",
"isResizable": true
},
"id": "1cfa42b1-9dcf-471c-90ff-8ffe656d4b11"
},
{
"targets": [
{
"refId": "A",
"expr": "sum by (verb) (rate(apiserver_request_total[5m]))",
"legend": "{{ verb}}"
}
],
"name": "API Server - HTTP Requests by verb",
"links": [],
"description": "按照请求动作分类统计apiserver的请求数",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 8,
"i": "94def0cb-0b86-42f7-a4b2-dde714bbb918",
"isResizable": true
},
"id": "94def0cb-0b86-42f7-a4b2-dde714bbb918"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(rate(rest_client_request_duration_seconds_bucket{job=\"apiserver\"}[5m])) by (instance)",
"legend": "{{ instance }}"
}
],
"name": "API Server - HTTP Requets Latency by instance",
"links": [],
"description": "apiserver的响应延迟",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "milliseconds"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 16,
"i": "1e775704-9ee4-45ce-9d24-b49af89fb5c7",
"isResizable": true
},
"id": "1e775704-9ee4-45ce-9d24-b49af89fb5c7"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(rate(rest_client_request_duration_seconds_bucket{job=\"apiserver\"}[5m])) by (verb)",
"legend": "{{ verb }}"
}
],
"name": "API Server - HTTP Requets Latency by verb",
"links": [],
"description": "apiserver的响应延迟按请求动作分类统计",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "milliseconds"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 16,
"i": "045dca2d-d69b-47a7-b25e-656adb357e11",
"isResizable": true
},
"id": "045dca2d-d69b-47a7-b25e-656adb357e11"
},
{
"targets": [
{
"refId": "A",
"expr": "sum by(instance) (rate(apiserver_request_total{code=~\"5..\", job=\"apiserver\"}[5m]))\n / sum by(instance) (rate(apiserver_request_total{job=\"apiserver\"}[5m]))",
"legend": "{{ instance }}"
}
],
"name": "API Server - Errors by Instance",
"links": [],
"description": "apiserver的5xx 错误率(5xx请求数/总请求数)",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 24,
"i": "92a209a1-7d30-4627-9ae1-55ded5095ed7",
"isResizable": true
},
"id": "92a209a1-7d30-4627-9ae1-55ded5095ed7"
},
{
"targets": [
{
"refId": "A",
"expr": "sum by(verb) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\"}[5m]))\n / sum by(verb) (rate(apiserver_request_total{job=\"apiserver\"}[5m]))",
"legend": "{{ verb }}"
}
],
"name": "API Server - Errors by verb",
"links": [],
"description": "apiserver的5xx错误率按请求动作分类统计",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 24,
"i": "1ca62e0b-72df-47d1-93ba-048ed49e9cb5",
"isResizable": true
},
"id": "1ca62e0b-72df-47d1-93ba-048ed49e9cb5"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(rate(apiserver_request_total[5m])) by (instance)",
"legend": "{{ instance }}"
}
],
"name": "API Server - Stacked HTTP Requests by instance",
"links": [],
"description": "5分钟内apiserver的请求数统计",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "normal"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 32,
"i": "3e9f9df7-d9fb-4791-b3b2-2c52678f060f",
"isResizable": true
},
"id": "3e9f9df7-d9fb-4791-b3b2-2c52678f060f"
},
{
"targets": [
{
"refId": "A",
"expr": "sum(rate(workqueue_depth{job=\"apiserver\"}[5m])) by (instance)",
"legend": "{{ instance }}"
}
],
"name": "API Server - Work Queue by instance",
"links": [],
"description": "apiserver工作队列深度越接近0越好",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 32,
"i": "83f22cf4-9c65-4ad3-900b-fa6fc914dd88",
"isResizable": true
},
"id": "83f22cf4-9c65-4ad3-900b-fa6fc914dd88"
},
{
"targets": [
{
"refId": "A",
"expr": "rate(process_cpu_seconds_total{job=\"apiserver\"}[5m])",
"legend": "{{ instance }}"
}
],
"name": "API Server - CPU Usage by instance",
"links": [],
"description": "apiserver的cpu使用率",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "percent",
"decimals": 2
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 0,
"y": 40,
"i": "3d5c1ae5-e640-4986-9202-78258169bffb",
"isResizable": true
},
"id": "3d5c1ae5-e640-4986-9202-78258169bffb"
},
{
"targets": [
{
"refId": "A",
"expr": "process_resident_memory_bytes{job=\"apiserver\"}",
"legend": "{{ instance }}"
}
],
"name": "API Server - Memory Usage by instance",
"links": [],
"description": "apiserver的内存使用量",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "bytesIEC"
},
"thresholds": {
"steps": []
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.25,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 8,
"w": 12,
"x": 12,
"y": 40,
"i": "1550a2d5-c808-4174-865a-a41b2c16b486",
"isResizable": true
},
"id": "1550a2d5-c808-4174-865a-a41b2c16b486"
}
]
}
}