diff --git a/etc/script/notify.py b/etc/script/notify.py index 73b246ec..eba03b0b 100755 --- a/etc/script/notify.py +++ b/etc/script/notify.py @@ -8,6 +8,7 @@ import time import requests from email.mime.text import MIMEText from email.header import Header +from bottle import template reload(sys) # reload 才能调用 setdefaultencoding 方法 sys.setdefaultencoding('utf-8') # 设置 'utf-8' @@ -18,19 +19,9 @@ sys.setdefaultencoding('utf-8') # 设置 'utf-8' # 3. 调用SMTP服务器发送告警,微信、钉钉、飞书、slack、jira、短信、电话等等留给社区实现 # 脚本二开指南 -# 1. 可以根据下面的TEST_ALERT_JSON 中的结构修改脚本发送逻辑,定制化告警格式格式如下 -""" -[告警类型:prometheus] -[规则名称:a] -[是否已恢复:已触发] -[告警级别:1] -[触发时间:2021-07-02 16:05:14] -[可读表达式:go_goroutines>0] -[当前值:[vector={__name__="go_goroutines", instance="localhost:9090", job="prometheus"}]: [value=33.000000]] -[标签组:instance=localhost:9090 job=prometheus] -""" -# 2. 每个告警会以json文件的格式存储在LOCAL_EVENT_FILE_DIR 下面,文件名为 filename = '%d_%d_%d' % (rule_id, event_id, trigger_time) -# 3. 告警通道需要自行定义Send类中的send_xxx同名方法,反射调用:举例 event.notify_channels = [qq dingding] 则需要Send类中 有 send_qq send_dingding方法 +# 1. 可以根据下面的 TEST_ALERT_JSON 中的结构修改脚本发送逻辑,定制化告警格式见tpl下的模块文件 +# 2. 每个告警会以json文件的格式存储在 LOCAL_EVENT_FILE_DIR 下面,文件名为 filename = '%d_%d_%d' % (rule_id, event_id, trigger_time) +# 3. 告警通道需要自行定义Send类中的send_xxx的方法,反射调用:举例 event.notify_channels = [邮件] 则需要在NOTIFY_CHANNEL_DICT配置 邮件=email,在Send类中实现 send_email 方法 # 4. im发群信息,比如钉钉发群信息需要群的webhook机器人 token,这个信息可以在user的contacts map中,各个send_方法处理即可 # 5. 用户创建一个虚拟的用户保存上述im群 的机器人token信息 user的contacts map中 @@ -40,12 +31,6 @@ mail_user = "ulricqin" mail_pass = "password" mail_from = "ulricqin@163.com" -# just for test -mail_body = """ -

邮件发送测试

-

baidu

-""" - # 本地告警event json存储目录 LOCAL_EVENT_FILE_DIR = ".alerts" NOTIFY_CHANNELS_SPLIT_STR = " " @@ -57,12 +42,20 @@ DINGTALK_API = "https://oapi.dingtalk.com/robot/send" WECOM_ROBOT_TOKEN_NAME = "wecom_robot_token" WECOM_API = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send" +NOTIFY_CHANNEL_DICT = { + "email":"email", + "sms":"sms", + "voice":"voice", + "dingtalk":"dingtalk", + "wecom":"wecom" +} + # stdin 告警json实例 TEST_ALERT_JSON = { "event": { "alert_duration": 10, "notify_channels": "dingtalk", - "res_classpaths": "", + "res_classpaths": "all", "id": 4, "notify_group_objs": None, "rule_note": "", @@ -91,8 +84,8 @@ TEST_ALERT_JSON = { "status": 0, "tags": "instance=localhost:9090 job=prometheus", "trigger_time": 1625213114, - "res_ident": "", - "rule_name": "a", + "res_ident": "ident1", + "rule_name": "alert_test", "is_prome_pull": 1, "notify_users": "1", "notify_groups": "", @@ -100,7 +93,7 @@ TEST_ALERT_JSON = { "values": "[vector={__name__=\"go_goroutines\", instance=\"localhost:9090\", job=\"prometheus\"}]: [value=33.000000]", "readable_expression": "go_goroutines>0", "notify_user_objs": None, - "is_recovery": 0, + "is_recovery": 1, "rule_id": 1 }, "rule": { @@ -166,10 +159,9 @@ def main(): # 持久化到本地json文件 persist(payload, rule_id, event_id, trigger_time) # 生成告警内容 - alert_content = content_gen(payload) - + alert_content = sms_content_gen(values_gen(payload)) for ch in notify_channels: - send_func_name = "send_{}".format(ch.strip()) + send_func_name = "send_{}".format(NOTIFY_CHANNEL_DICT.get(ch.strip())) has_func = hasattr(Send, send_func_name) if not has_func: @@ -179,44 +171,47 @@ def main(): send_func = getattr(Send, send_func_name) send_func(alert_content, payload) - -def content_gen(payload): - # 生成格式化告警内容 - text = "" +def values_gen(payload): event_obj = payload.get("event") + values = { + "IsAlert": event_obj.get("is_recovery") == 0, + "IsMachineDep": event_obj.get("res_classpaths") != "", + "Status": status_gen(event_obj.get("priority"),event_obj.get("is_recovery")), + "Sname": event_obj.get("rule_name"), + "Ident": event_obj.get("res_ident"), + "Classpath": event_obj.get("res_classpaths"), + "Metric": metric_gen(event_obj.get("history_points")), + "Tags": event_obj.get("tags"), + "Value": event_obj.get("values"), + "ReadableExpression": event_obj.get("readable_expression"), + "TriggerTime": time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(event_obj.get("trigger_time"))), + "Elink": "http://n9e.didiyun.com/strategy/edit/{}".format(event_obj.get("rule_id")), + "Slink": "http://n9e.didiyun.com/event/{}".format(event_obj.get("id")) + } - rule_type = event_obj.get("is_prome_pull") - type_str_m = {1: "prometheus", 0: "n9e"} - rule_type = type_str_m.get(rule_type) + return values - text += "[告警类型:{}]\n".format(rule_type) +def email_content_gen(values): + return template('etc/script/tpl/mail.tpl', values) - rule_name = event_obj.get("rule_name") - text += "[规则名称:{}]\n".format(rule_name) +def sms_content_gen(values): + return template('etc/script/tpl/sms.tpl', values) - is_recovery = event_obj.get("is_recovery") - is_recovery_str_m = {1: "已恢复", 0: "已触发"} - is_recovery = is_recovery_str_m.get(is_recovery) - text += "[是否已恢复:{}]\n".format(is_recovery) +def status_gen(priority,is_recovery): + is_recovery_str_m = {1: "恢复", 0: "告警"} + status = "P{} {}".format(priority, is_recovery_str_m.get(is_recovery)) + return status - priority = event_obj.get("priority") - text += "[告警级别:{}]\n".format(priority) - - trigger_time = event_obj.get("trigger_time") - text += "[触发时间:{}]\n".format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(trigger_time)))) - - readable_expression = event_obj.get("readable_expression") - text += "[可读表达式:{}]\n".format(readable_expression) - - values = event_obj.get("values") - text += "[当前值:{}]\n".format(values) - - tags = event_obj.get("tags") - text += "[标签组:{}]\n".format(tags) - - print(text) - return text +def subject_gen(priority,is_recovery,rule_name): + is_recovery_str_m = {1: "恢复", 0: "告警"} + subject = "P{} {} {}".format(priority, is_recovery_str_m.get(is_recovery), rule_name) + return subject +def metric_gen(history_points): + metrics = [] + for item in history_points: + metrics.append(item.get("metric")) + return ",".join(metrics) def persist(payload, rule_id, event_id, trigger_time): if not os.path.exists(LOCAL_EVENT_FILE_DIR): @@ -237,11 +232,11 @@ class Send(object): return recipients = emails - - message = MIMEText(alert_content, 'html', 'utf-8') + mail_body = email_content_gen(values_gen(payload)) + message = MIMEText(mail_body, 'html', 'utf-8') message['From'] = mail_from message['To'] = ", ".join(recipients) - message["Subject"] = "n9e alert" + message["Subject"] = subject_gen(payload.get("event").get("priority"),payload.get("event").get("is_recovery"),payload.get("event").get("rule_name")) smtp = smtplib.SMTP_SSL(mail_host, mail_port) smtp.login(mail_user, mail_pass) @@ -320,10 +315,12 @@ def mail_test(): recipients = ["ulricqin@qq.com", "ulric@163.com"] + payload = json.loads(json.dumps(TEST_ALERT_JSON)) + mail_body = email_content_gen(values_gen(payload)) message = MIMEText(mail_body, 'html', 'utf-8') message['From'] = mail_from message['To'] = ", ".join(recipients) - message["Subject"] = "n9e alert" + message["Subject"] = subject_gen(payload.get("event").get("priority"),payload.get("event").get("is_recovery"),payload.get("event").get("rule_name")) smtp = smtplib.SMTP_SSL(mail_host, mail_port) smtp.login(mail_user, mail_pass) @@ -332,7 +329,6 @@ def mail_test(): print("mail_test_done") - if __name__ == "__main__": if len(sys.argv) == 1: main() @@ -340,3 +336,4 @@ if __name__ == "__main__": mail_test() else: print("I am confused") + diff --git a/etc/script/tpl/mail.tpl b/etc/script/tpl/mail.tpl new file mode 100644 index 00000000..bb734301 --- /dev/null +++ b/etc/script/tpl/mail.tpl @@ -0,0 +1,216 @@ + + + + + + 夜莺告警通知 + + + +
+
+
+

{{Sname}}

+

+
+ +
+ +
+ + + % if IsAlert: + + + + + % else: + + + + + % end + + % if IsMachineDep: + + + + + + + + + % end + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
级别状态:{{Status}}
级别状态:{{Status}}
告警设备:{{Ident}}
所属分组: + {{Classpath}}
+
监控指标:{{Metric}}
tags:{{Tags}}
当前值:{{Value}}
报警说明: + {{ReadableExpression}} +
触发时间: + {{TriggerTime}} +
报警详情:{{Elink}}
报警策略:{{Slink}}
+ +
+ + +
+
+
+ + diff --git a/etc/script/tpl/sms.tpl b/etc/script/tpl/sms.tpl new file mode 100644 index 00000000..cfd70448 --- /dev/null +++ b/etc/script/tpl/sms.tpl @@ -0,0 +1,14 @@ +级别状态:{{Status}} +策略名称:{{Sname}} +% if IsMachineDep: +告警设备:{{Ident}} +挂载节点:{{Classpath}} +% end +监控指标:{{Metric}} +指标标签:{{Tags}} +当前值:{{!Value}} +报警说明:{{!ReadableExpression}} +触发时间:{{TriggerTime}} +报警详情:{{Elink}} +报警策略:{{Slink}} +