feat: support history alert events store (#760)
This commit is contained in:
parent
c38d595cb8
commit
4eb79fb017
|
@ -66,10 +66,7 @@ func consume(events []interface{}, sema *semaphore.Semaphore) {
|
|||
event.MarkMuted()
|
||||
|
||||
if config.Config.Alert.MutedAlertPersist {
|
||||
err := event.Add()
|
||||
if err != nil {
|
||||
logger.Warningf("event_consume: insert muted event err:%v, event:%+v", err, event)
|
||||
}
|
||||
persist(event)
|
||||
}
|
||||
|
||||
continue
|
||||
|
@ -165,6 +162,11 @@ func persist(event *models.AlertEvent) {
|
|||
logger.Warningf("event_consume: insert alert event err:%v, event:%+v", err, event)
|
||||
}
|
||||
}
|
||||
obj := ToHistoryAlertEvent(event)
|
||||
err := obj.Add()
|
||||
if err != nil {
|
||||
logger.Warningf("event_consume: insert history alert event err:%v, event:%+v", err, event)
|
||||
}
|
||||
}
|
||||
|
||||
type AlertMsg struct {
|
||||
|
@ -295,3 +297,29 @@ func enrichTag(event *models.AlertEvent, alertRule *models.AlertRule) {
|
|||
sort.Strings(tagList)
|
||||
event.Tags = strings.Join(tagList, " ")
|
||||
}
|
||||
|
||||
func ToHistoryAlertEvent(ae *models.AlertEvent) *models.HistoryAlertEvent {
|
||||
var obj models.HistoryAlertEvent
|
||||
obj.RuleId = ae.RuleId
|
||||
obj.RuleName = ae.RuleName
|
||||
obj.RuleNote = ae.RuleNote
|
||||
obj.HashId = ae.HashId
|
||||
obj.IsPromePull = ae.IsPromePull
|
||||
obj.ResClasspaths = ae.ResClasspaths
|
||||
obj.ResIdent = ae.ResIdent
|
||||
obj.Priority = ae.Priority
|
||||
obj.Status = ae.Status
|
||||
obj.IsRecovery = ae.IsRecovery
|
||||
obj.HistoryPoints = ae.HistoryPoints
|
||||
obj.TriggerTime = ae.TriggerTime
|
||||
obj.Values = ae.Values
|
||||
obj.NotifyChannels = ae.NotifyChannels
|
||||
obj.NotifyGroups = ae.NotifyGroups
|
||||
obj.NotifyUsers = ae.NotifyUsers
|
||||
obj.RunbookUrl = ae.RunbookUrl
|
||||
obj.ReadableExpression = ae.ReadableExpression
|
||||
obj.Tags = ae.Tags
|
||||
obj.NotifyGroupObjs = ae.NotifyGroupObjs
|
||||
obj.NotifyUserObjs = ae.NotifyUserObjs
|
||||
return &obj
|
||||
}
|
||||
|
|
|
@ -374,6 +374,17 @@ func AlertEvent(id int64) *models.AlertEvent {
|
|||
return obj
|
||||
}
|
||||
|
||||
func HistoryAlertEvent(id int64) *models.HistoryAlertEvent {
|
||||
obj, err := models.HistoryAlertEventGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such alert all event")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func CollectRule(id int64) *models.CollectRule {
|
||||
obj, err := models.CollectRuleGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
|
|
@ -155,6 +155,9 @@ func configRoutes(r *gin.Engine) {
|
|||
pages.GET("/alert-event/:id", login(), alertEventGet)
|
||||
pages.DELETE("/alert-event/:id", login(), alertEventDel)
|
||||
|
||||
pages.GET("/history-alert-events", login(), historyAlertEventGets)
|
||||
pages.GET("/history-alert-event/:id", login(), historyAlertEventGet)
|
||||
|
||||
pages.GET("/classpath/:id/collect-rules", login(), collectRuleGets)
|
||||
pages.POST("/collect-rules", login(), collectRuleAdd)
|
||||
pages.DELETE("/collect-rules", login(), collectRuleDel)
|
||||
|
@ -283,6 +286,9 @@ func configRoutes(r *gin.Engine) {
|
|||
v1.GET("/alert-event/:id", login(), alertEventGet)
|
||||
v1.DELETE("/alert-event/:id", login(), alertEventDel)
|
||||
|
||||
v1.GET("/history-alert-events", login(), historyAlertEventGets)
|
||||
v1.GET("/history-alert-event/:id", login(), historyAlertEventGet)
|
||||
|
||||
v1.POST("/collect-rules", login(), collectRuleAdd)
|
||||
v1.DELETE("/collect-rules", login(), collectRuleDel)
|
||||
v1.PUT("/collect-rule/:id", login(), collectRulePut)
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func historyAlertEventGets(c *gin.Context) {
|
||||
stime := queryInt64(c, "stime", 0)
|
||||
etime := queryInt64(c, "etime", 0)
|
||||
hours := queryInt64(c, "hours", 0)
|
||||
now := time.Now().Unix()
|
||||
if hours != 0 {
|
||||
stime = now - 3600*hours
|
||||
etime = now + 3600*24
|
||||
}
|
||||
|
||||
if stime != 0 && etime == 0 {
|
||||
etime = now + 3600*24
|
||||
}
|
||||
|
||||
query := queryStr(c, "query", "")
|
||||
priority := queryInt(c, "priority", -1)
|
||||
status := queryInt(c, "status", -1)
|
||||
isRecovery := queryInt(c, "is_recovery", -1)
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
|
||||
total, err := models.HistoryAlertEventsTotal(stime, etime, query, status, isRecovery, priority)
|
||||
dangerous(err)
|
||||
|
||||
list, err := models.HistoryAlertEventGets(stime, etime, query, status, isRecovery, priority, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
for i := 0; i < len(list); i++ {
|
||||
dangerous(list[i].FillObjs())
|
||||
}
|
||||
|
||||
if len(list) == 0 {
|
||||
renderZeroPage(c)
|
||||
return
|
||||
}
|
||||
|
||||
renderData(c, map[string]interface{}{
|
||||
"total": total,
|
||||
"list": list,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func historyAlertEventGet(c *gin.Context) {
|
||||
ae := HistoryAlertEvent(urlParamInt64(c, "id"))
|
||||
dangerous(ae.FillObjs())
|
||||
renderData(c, ae, nil)
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
|
||||
//"github.com/didi/nightingale/v5/vos"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"xorm.io/builder"
|
||||
)
|
||||
|
||||
type HistoryAlertEvent struct {
|
||||
Id int64 `json:"id"`
|
||||
RuleId int64 `json:"rule_id"`
|
||||
RuleName string `json:"rule_name"`
|
||||
RuleNote string `json:"rule_note"`
|
||||
HashId string `json:"hash_id"` // 唯一标识
|
||||
IsPromePull int `json:"is_prome_pull"` // 代表是否是prometheus pull告警,为1时前端使用 ReadableExpression 拉取最近1小时数据
|
||||
ResClasspaths string `json:"res_classpaths"`
|
||||
ResIdent string `json:"res_ident" xorm:"-"` // res_ident会出现在tags字段,就不用单独写入数据库了,但是各块逻辑中有个单独的res_ident字段更便于处理,所以struct里还留有这个字段;前端不用展示这个字段
|
||||
Priority int `json:"priority"`
|
||||
Status int `json:"status"` // 标识是否 被屏蔽
|
||||
IsRecovery int `json:"is_recovery"` // 0: alert, 1: recovery
|
||||
HistoryPoints json.RawMessage `json:"history_points"` // HistoryPoints{}
|
||||
TriggerTime int64 `json:"trigger_time"`
|
||||
Values string `json:"values" xorm:"-"` // e.g. cpu.idle: 23.3; load.1min: 32
|
||||
NotifyChannels string `json:"notify_channels"`
|
||||
NotifyGroups string `json:"notify_groups"`
|
||||
NotifyUsers string `json:"notify_users"`
|
||||
RunbookUrl string `json:"runbook_url"`
|
||||
ReadableExpression string `json:"readable_expression"` // e.g. mem.bytes.used.percent(all,60s) > 0
|
||||
Tags string `json:"tags"` // merge data_tags rule_tags and res_tags
|
||||
NotifyGroupObjs []UserGroup `json:"notify_group_objs" xorm:"-"`
|
||||
NotifyUserObjs []User `json:"notify_user_objs" xorm:"-"`
|
||||
}
|
||||
|
||||
// IsAlert 语法糖,避免直接拿IsRecovery字段做比对不直观易出错
|
||||
func (hae *HistoryAlertEvent) IsAlert() bool {
|
||||
return hae.IsRecovery != 1
|
||||
}
|
||||
|
||||
// IsRecov 语法糖,避免直接拿IsRecovery字段做比对不直观易出错
|
||||
func (hae *HistoryAlertEvent) IsRecov() bool {
|
||||
return hae.IsRecovery == 1
|
||||
}
|
||||
|
||||
// MarkAlert 语法糖,标记为告警状态
|
||||
func (hae *HistoryAlertEvent) MarkAlert() {
|
||||
hae.IsRecovery = 0
|
||||
}
|
||||
|
||||
// MarkRecov 语法糖,标记为恢复状态
|
||||
func (hae *HistoryAlertEvent) MarkRecov() {
|
||||
hae.IsRecovery = 1
|
||||
}
|
||||
|
||||
// MarkMuted 语法糖,标记为屏蔽状态
|
||||
func (hae *HistoryAlertEvent) MarkMuted() {
|
||||
hae.Status = 1
|
||||
}
|
||||
|
||||
func (hae *HistoryAlertEvent) FillObjs() error {
|
||||
userGroupIds := strings.Fields(hae.NotifyGroups)
|
||||
if len(userGroupIds) > 0 {
|
||||
groups, err := UserGroupGetsByIdsStr(userGroupIds)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hae.NotifyGroupObjs = groups
|
||||
}
|
||||
|
||||
userIds := strings.Fields(hae.NotifyUsers)
|
||||
if len(userIds) > 0 {
|
||||
users, err := UserGetsByIdsStr(userIds)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hae.NotifyUserObjs = users
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hae *HistoryAlertEvent) Add() error {
|
||||
return DBInsertOne(hae)
|
||||
}
|
||||
|
||||
func HistoryAlertEventsTotal(stime, etime int64, query string, status, isRecovery, priority int) (num int64, err error) {
|
||||
cond := builder.NewCond()
|
||||
if stime != 0 && etime != 0 {
|
||||
cond = cond.And(builder.Between{Col: "trigger_time", LessVal: stime, MoreVal: etime})
|
||||
}
|
||||
|
||||
if status != -1 {
|
||||
cond = cond.And(builder.Eq{"status": status})
|
||||
}
|
||||
|
||||
if isRecovery != -1 {
|
||||
cond = cond.And(builder.Eq{"is_recovery": isRecovery})
|
||||
}
|
||||
|
||||
if priority != -1 {
|
||||
cond = cond.And(builder.Eq{"priority": priority})
|
||||
}
|
||||
|
||||
if query != "" {
|
||||
arr := strings.Fields(query)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
qarg := "%" + arr[i] + "%"
|
||||
innerCond := builder.NewCond()
|
||||
innerCond = innerCond.Or(builder.Like{"res_classpaths", qarg})
|
||||
innerCond = innerCond.Or(builder.Like{"rule_name", qarg})
|
||||
innerCond = innerCond.Or(builder.Like{"tags", qarg})
|
||||
cond = cond.And(innerCond)
|
||||
}
|
||||
}
|
||||
|
||||
num, err = DB.Where(cond).Count(new(HistoryAlertEvent))
|
||||
if err != nil {
|
||||
logger.Errorf("mysql.error: count history_alert_event fail: %v", err)
|
||||
return 0, internalServerError
|
||||
}
|
||||
|
||||
return num, nil
|
||||
}
|
||||
|
||||
func HistoryAlertEventGets(stime, etime int64, query string, status, isRecovery, priority int, limit, offset int) ([]HistoryAlertEvent, error) {
|
||||
cond := builder.NewCond()
|
||||
if stime != 0 && etime != 0 {
|
||||
cond = cond.And(builder.Between{Col: "trigger_time", LessVal: stime, MoreVal: etime})
|
||||
}
|
||||
|
||||
if status != -1 {
|
||||
cond = cond.And(builder.Eq{"status": status})
|
||||
}
|
||||
|
||||
if isRecovery != -1 {
|
||||
cond = cond.And(builder.Eq{"is_recovery": isRecovery})
|
||||
}
|
||||
|
||||
if priority != -1 {
|
||||
cond = cond.And(builder.Eq{"priority": priority})
|
||||
}
|
||||
|
||||
if query != "" {
|
||||
arr := strings.Fields(query)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
qarg := "%" + arr[i] + "%"
|
||||
innerCond := builder.NewCond()
|
||||
innerCond = innerCond.Or(builder.Like{"res_classpaths", qarg})
|
||||
innerCond = innerCond.Or(builder.Like{"rule_name", qarg})
|
||||
innerCond = innerCond.Or(builder.Like{"tags", qarg})
|
||||
cond = cond.And(innerCond)
|
||||
}
|
||||
}
|
||||
|
||||
var objs []HistoryAlertEvent
|
||||
err := DB.Where(cond).Desc("trigger_time").Limit(limit, offset).Find(&objs)
|
||||
if err != nil {
|
||||
logger.Errorf("mysql.error: query history_alert_event fail: %v", err)
|
||||
return objs, internalServerError
|
||||
}
|
||||
|
||||
if len(objs) == 0 {
|
||||
return []HistoryAlertEvent{}, nil
|
||||
}
|
||||
|
||||
return objs, nil
|
||||
}
|
||||
|
||||
func HistoryAlertEventGet(where string, args ...interface{}) (*HistoryAlertEvent, error) {
|
||||
var obj HistoryAlertEvent
|
||||
has, err := DB.Where(where, args...).Get(&obj)
|
||||
if err != nil {
|
||||
logger.Errorf("mysql.error: query history_alert_event(%s)%+v fail: %s", where, args, err)
|
||||
return nil, internalServerError
|
||||
}
|
||||
|
||||
if !has {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return &obj, nil
|
||||
}
|
25
sql/n9e.sql
25
sql/n9e.sql
|
@ -313,6 +313,31 @@ CREATE TABLE `alert_event` (
|
|||
KEY (`trigger_time`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
|
||||
CREATE TABLE `history_alert_event` (
|
||||
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
|
||||
`hash_id` varchar(255) NOT NULL COMMENT 'rule_id + point_pk',
|
||||
`rule_id` bigint unsigned NOT NULL,
|
||||
`rule_name` varchar(255) NOT NULL,
|
||||
`rule_note` varchar(512) NOT NULL DEFAULT 'alert rule note',
|
||||
`res_classpaths` varchar(1024) NOT NULL DEFAULT '' COMMENT 'belong classpaths',
|
||||
`priority` tinyint(1) NOT NULL,
|
||||
`status` tinyint(1) NOT NULL,
|
||||
`is_prome_pull` tinyint(1) NOT NULL,
|
||||
`is_recovery` tinyint(1) NOT NULL,
|
||||
`history_points` text COMMENT 'metric, history points',
|
||||
`trigger_time` bigint NOT NULL,
|
||||
`notify_channels` varchar(255) NOT NULL DEFAULT '',
|
||||
`notify_groups` varchar(255) NOT NULL DEFAULT '',
|
||||
`notify_users` varchar(255) NOT NULL DEFAULT '',
|
||||
`runbook_url` varchar(255) DEFAULT NULL,
|
||||
`readable_expression` varchar(1024) NOT NULL COMMENT 'e.g. mem.bytes.used.percent(all,60s) > 0',
|
||||
`tags` varchar(1024) NOT NULL DEFAULT 'merge data_tags rule_tags and res_tags',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `hash_id` (`hash_id`),
|
||||
KEY `rule_id` (`rule_id`),
|
||||
KEY `trigger_time` (`trigger_time`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
|
||||
CREATE TABLE `metric_description` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`metric` varchar(255) not null default '',
|
||||
|
|
Loading…
Reference in New Issue