This commit is contained in:
StarsL.cn 2022-06-04 17:36:18 +08:00
parent b488b01be3
commit 230843e95a
12 changed files with 193 additions and 5 deletions

View File

@ -39,3 +39,110 @@ def ecs_config(services_list,ostype_list):
"""
configs = configs + config_str
return {'code': 20000,'configs': configs }
def get_rules():
rules = """
groups:
- name: node_usage_record_rules
interval: 1m
rules:
- record: cpu:usage:rate1m
expr: (1 - avg(rate(node_cpu_seconds_total{mode="idle"}[1m])) by (instance,vendor,account,group,name)) * 100
- record: mem:usage:rate1m
expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100
- name: node-exporter
rules:
- alert: 内存使用率
expr: 100 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 > 90
for: 5m
labels:
alertype: system
severity: warning
annotations:
description: "{{ $labels.name }}:内存使用率{{ $value | humanize }}%\\n> {{ $labels.group }}-{{ $labels.instance }}"
- alert: CPU使用率
expr: 100 - (avg by(instance,name,group,account) (irate(node_cpu_seconds_total[5m])) * 100) > 92
for: 5m
labels:
alertype: system
severity: warning
annotations:
description: "{{ $labels.name }}CPU使用率{{ $value | humanize }}%\\n> {{ $labels.group }}-{{ $labels.instance }}"
- alert: 系统负载
expr: node_load5 / on (instance,name,group,account) sum(count(node_cpu_seconds_total{mode='system'}) by (cpu,instance,name,group,account)) by(instance,name,group,account) > 1.7
for: 10m
labels:
alertype: system
severity: warning
annotations:
description: "{{ $labels.name }}:系统负载{{ $value | humanize }}倍\\n> {{ $labels.group }}-{{ $labels.instance }}"
- alert: 磁盘使用率
expr: |
100 - (node_filesystem_avail_bytes/node_filesystem_size_bytes{fstype=~"ext.?|xfs",mountpoint!~".*pods.*|/var/lib/docker/devicemapper/mnt/.*"} * 100) > 85
for: 5m
labels:
alertype: system
severity: warning
annotations:
description: "{{ $labels.name }}_{{ $labels.mountpoint }}:磁盘使用率{{ $value | humanize }}%\\n> {{ $labels.group }}-{{ $labels.instance }}"
- alert: K8S节点POD磁盘使用率
expr: 100 - (node_filesystem_avail_bytes/node_filesystem_size_bytes{mountpoint=~"/var/lib/docker/devicemapper/mnt/.*"} * 100) > 85
for: 5m
labels:
alertype: system
severity: warning
annotations:
description: "{{ $labels.name }}_{{ $labels.mountpoint }}:磁盘使用率{{ $value | humanize }}%\\n> {{ $labels.group }}-{{ $labels.instance }}"
- alert: NFS磁盘使用率
expr: 100 - (node_filesystem_avail_bytes/node_filesystem_size_bytes{fstype="nfs"} * 100) > 90
for: 5m
labels:
alertype: system
severity: warning
annotations:
description: "{{ $labels.name }}_{{ $labels.mountpoint }}:磁盘使用率{{ $value | humanize }}%\\n> {{ $labels.group }}-{{ $labels.instance }}"
- alert: 磁盘读写容量
expr: (irate(node_disk_read_bytes_total[5m]) ) /1024 /1024 > 80 or (irate(node_disk_written_bytes_total[5m]) ) /1024 /1024 > 80
for: 8m
labels:
alertype: disk
severity: warning
annotations:
description: "{{ $labels.name }}_{{ $labels.device }}当前IO为{{ $value | humanize }}MB/s\\n> {{ $labels.group }}-{{ $labels.instance }}"
- alert: 网络流入下载数据过多
expr: sum by(device,instance, name, group, account) (irate(node_network_receive_bytes_total{device!~'tap.*|veth.*|br.*|docker.*|virbr.*|lo.*|cni.*'}[5m])) / 1024 / 1024 > 70
for: 5m
labels:
alertype: network
severity: warning
annotations:
description: "{{ $labels.name }}:流入数据为{{ $value | humanize }}MB/s\\n> {{ $labels.group }}-{{ $labels.instance }}"
- alert: 网络流出上传数据过多
expr: sum by(device,instance, name, group, account) (irate(node_network_transmit_bytes_total{device!~'tap.*|veth.*|br.*|docker.*|virbr.*|lo.*|cni.*'}[5m])) / 1024 / 1024 > 70
for: 5m
labels:
alertype: network
severity: warning
annotations:
description: "{{ $labels.name }}:流出数据为{{ $value | humanize }}MB/s\\n> {{ $labels.group }}-{{ $labels.instance }}"
- name: Itself
rules:
- alert: Exporter状态
expr: up == 0
for: 3m
labels:
alertype: itself
severity: critical
annotations:
description: "{{ $labels.job }}:异常\\n> {{ $labels.group }}-{{ $labels.name }}-{{ $labels.instance }}"
"""
return {"code": 20000, "rules": rules}

View File

@ -39,6 +39,8 @@ def get_service():
return {'code': 50000, 'data': f'{response.status_code}:{response.text}'}
def add_service(vendor,account,region,group,name,ip,port,os):
if port is None or name is None:
return {"code": 50000, "data": f"名称或IP不能为空"}
sid = f"{vendor}/{account}/{region}/{group}@{name}"
instance = f'{ip}:{port}'
if '//' in sid or sid.startswith('/') or sid.endswith('/'):

View File

@ -2,13 +2,18 @@ from flask_httpauth import HTTPTokenAuth
from itsdangerous import TimedJSONWebSignatureSerializer
from units import consul_kv
secret_key = consul_kv.get_value('ConsulManager/assets/secret/skey')['sk']
s = TimedJSONWebSignatureSerializer(secret_key)
s = TimedJSONWebSignatureSerializer(secret_key,expires_in=28800)
auth = HTTPTokenAuth()
@auth.verify_token
def verify_token(token):
try:
data = s.loads(token)
except BadSignature:
raise AuthFailed(msg='token不正确')
except SignatureExpired:
raise AuthFailed(msg='token过期')
return {"code": 40000, "data": "登录过期,请重新登录!"}
except:
return False
return True

View File

@ -6,7 +6,7 @@ sys.path.append("..")
from config import admin_passwd
from units import token_auth, consul_kv
secret_key = consul_kv.get_value('ConsulManager/assets/secret/skey')['sk']
s = TimedJSONWebSignatureSerializer(secret_key)
s = TimedJSONWebSignatureSerializer(secret_key,expires_in=28800)
blueprint = Blueprint('login',__name__)
api = Api(blueprint)

View File

@ -38,7 +38,8 @@ class Nodes(Resource):
serivces = i.split("/")
services_list.append(f'{serivces[0]}_{serivces[1]}_{serivces[2]}')
return {'code': 20000,'services_list': sorted(set(services_list))}
elif stype == 'rules':
return gen_config.get_rules()
def post(self, stype):
if stype == 'config':
args = parser.parse_args()

View File

@ -31,6 +31,7 @@ class SelfnodeApi(Resource):
return selfnode_manager.get_service()
def post(self):
args = parser.parse_args()
print('=======\n',args,flush=True)
return selfnode_manager.add_service(args['vendor'],args['account'],args['region'],
args['group'],args['name'],args['ip'],args['port'],args['os'])
def put(self):

View File

@ -59,3 +59,9 @@ export function getConfig(services_dict) {
data: { services_dict }
})
}
export function getRules() {
return request({
url: '/api/nodes/rules',
method: 'get'
})
}

View File

@ -113,6 +113,12 @@ export const constantRoutes = [
component: () => import('@/views/node-exporter/pconfig'),
meta: { title: 'Prometheus 配置', icon: 'el-icon-set-up' }
},
{
path: 'rules',
name: '告警规则',
component: () => import('@/views/node-exporter/rules'),
meta: { title: '告警规则', icon: 'el-icon-bell' }
},
{
path: 'grafana',
name: 'Grafana 看板',

View File

@ -115,7 +115,7 @@
<el-autocomplete v-model="temp.env" :fetch-suggestions="Sugg_env" placeholder="优先选择" clearable class="filter-item" />
</el-form-item>
<el-form-item label="名称" prop="name">
<el-input v-model="temp.name" placeholder="请输入" clearable class="filter-item" />
<el-input v-model="temp.name" placeholder="请输入" clearable class="filter-item" /><font size="2" color="#ff0000">前5个字段组合后需唯一重复会覆盖已有监控项</font>
</el-form-item>
<el-form-item prop="instance">
<span slot="label">

View File

@ -4,6 +4,15 @@
<el-link :underline="false" type="primary" icon="el-icon-star-on" href="https://github.com/starsliao/ConsulManager" target="_blank" class="dashboard-text">StarsL.cn</el-link>
</el-badge>
<el-timeline>
<el-timeline-item timestamp="2022/6/3" placement="top">
<el-card>
<h4>v0.5.2</h4>
<p>增加了node-exporter的告警规则</p>
<p>修正了一个新增自建主机监控项的bug</p>
<p>新增站点监控的描述做了优化</p>
<p>登录过期时间修改为8小时</p>
</el-card>
</el-timeline-item>
<el-timeline-item timestamp="2022/5/8" placement="top">
<el-card>
<h4>v0.5.1</h4>

View File

@ -46,7 +46,7 @@
</el-form>
<div align="center" class="title-container">
<span style="font-size:10px" class="title">v0.5.1</span>
<span style="font-size:12px" class="title">v0.5.2</span>
</div>
</div>
</template>

View File

@ -0,0 +1,51 @@
<template>
<div class="app-container">
<el-button v-clipboard:copy="rules" v-clipboard:success="onCopy" v-clipboard:error="onError" class="filter-item" type="warning" icon="el-icon-document-copy">
复制配置
</el-button>
<pre v-highlightjs="rules" style="line-height:120%"><code class="yaml yamlcode" /></pre>
</div>
</template>
<script>
import { getRules } from '@/api/node-exporter'
export default {
data() {
return {
listLoading: false,
rules: ''
}
},
created() {
this.fetchRules()
},
methods: {
onCopy() {
this.$message({
message: '复制成功!',
type: 'success'
})
},
onError() {
this.$message.error('复制失败!')
},
fetchRules() {
this.listLoading = true
getRules().then(response => {
this.rules = response.rules
this.listLoading = false
})
}
}
}
</script>
<style>
.yamlcode {
font-family:'Consolas';
}
pre {
max-height: 640px;
white-space: pre-wrap;
overflow:auto;
}
</style>