nightingale/models/alert_rule.go

430 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package models
import (
"encoding/json"
"fmt"
"regexp"
"strconv"
"strings"
"time"
"github.com/prometheus/prometheus/promql/parser"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
const PUSH = 0
const PULL = 1
const ALERT_RULE_ACTIVE = 0
const ALERT_RULE_DISABLED = 1
type AlertRule struct {
Id int64 `json:"id"`
GroupId int64 `json:"group_id"`
Name string `json:"name"`
Type int `json:"type"` // 0: nightingale, 1: prometheus
Expression json.RawMessage `json:"expression"`
Status int `json:"status"` // 0: active, 1: disabled
AppendTags string `json:"append_tags"`
EnableStime string `json:"enable_stime"`
EnableEtime string `json:"enable_etime"`
EnableDaysOfWeek string `json:"enable_days_of_week"`
RecoveryNotify int `json:"recovery_notify"`
Priority int `json:"priority"`
NotifyChannels string `json:"notify_channels"`
NotifyGroups string `json:"notify_groups"`
NotifyUsers string `json:"notify_users"`
Callbacks string `json:"callbacks"`
RunbookUrl string `json:"runbook_url"`
Note string `json:"note"`
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
AlertDuration int `json:"alert_duration"` // 告警统计周期PULL模型会当做P8S的for时间
PushExpr PushExpression `xorm:"-" json:"-"`
PullExpr PullExpression `xorm:"-" json:"-"`
FirstMetric string `xorm:"-" json:"-"` // Exps里可能有多个metric只取第一个给后续制作map使用
NotifyUsersDetail []*User `xorm:"-" json:"notify_users_detail"`
NotifyGroupsDetail []*UserGroup `xorm:"-" json:"notify_groups_detail"`
}
type PushExpression struct {
TagFilters []TagFilter `json:"tags_filters"`
ResFilters []ResFilter `json:"res_filters"`
Exps []Exp `json:"trigger_conditions"`
TogetherOrAny int `json:"together_or_any"` // 所有触发还是触发一条即可,=0所有 =1一条
}
type PullExpression struct {
PromQl string `json:"promql"` // promql 最终表达式
EvaluationInterval int `json:"evaluation_interval"` // promql pull 计算周期
}
type ResFilter struct {
Func string `json:"func"`
// * InClasspath -> 可以内存里做个大maphost->classpath然后看host对应的classpath中是否有某一个满足InClasspath的条件
// * NotInClasspath
// * InClasspathPrefix -> 可以内存里做个大maphost->classpath然后看host对应的classpath中是否有某一个满足InClasspathPrefix的条件
// * NotInClasspathPrefix
// * InResourceList
// * NotInResourceList
// * HasPrefixString
// * NoPrefixString
// * HasSuffixString
// * NoSuffixString
// * ContainsString
// * NotContainsString
// * MatchRegexp
// * NotMatchRegexp
Params []string `json:"params"`
}
type TagFilter struct {
Key string `json:"key"`
Func string `json:"func"`
// * InList
// * NotInList
// * HasPrefixString
// * NoPrefixString
// * HasSuffixString
// * NoSuffixString
// * ContainsString
// * NotContainsString
// * MatchRegexp
// * NotMatchRegexp
Params []string `json:"params"`
}
type Exp struct {
Optr string `json:"optr"` //>,<,=,!=
Func string `json:"func"` //all,max,min
Metric string `json:"metric"` //metric
Params []int `json:"params"` //连续n秒
Threshold float64 `json:"threshold"` //阈值
}
func (ar *AlertRule) Decode() error {
if ar.Type == PUSH {
err := json.Unmarshal(ar.Expression, &ar.PushExpr)
if err != nil {
logger.Warningf("decode alert rule(%d): unmarshal push expression(%s) error: %v", ar.Id, string(ar.Expression), err)
return err
}
if len(ar.PushExpr.Exps) < 1 {
logger.Warningf("decode alert rule(%d): exps size is zero", ar.Id)
return err
}
ar.FirstMetric = ar.PushExpr.Exps[0].Metric
} else {
err := json.Unmarshal(ar.Expression, &ar.PullExpr)
if err != nil {
logger.Warningf("decode alert rule(%d): unmarshal pull expression(%s) error: %v", ar.Id, string(ar.Expression), err)
return err
}
}
return nil
}
func (ar *AlertRule) TableName() string {
return "alert_rule"
}
func (ar *AlertRule) Validate() error {
if str.Dangerous(ar.Name) {
return _e("AlertRule name has invalid characters")
}
if err := ar.Decode(); err != nil {
return _e("AlertRule expression is invalid")
}
if ar.Type == PUSH {
if ar.AlertDuration <= 0 {
ar.AlertDuration = 60
}
for _, filter := range ar.PushExpr.ResFilters {
// 参数不能是空的,即不能一个参数都没有
if len(filter.Params) == 0 {
return _e("Resource filter(Func:%s)'s param invalid", filter.Func)
}
// 对于每个参数而言,不能包含空格,不能是空
for i := range filter.Params {
if strings.ContainsAny(filter.Params[i], " \r\n\t") {
return _e("Resource filter(Func:%s)'s param invalid", filter.Func)
}
if filter.Params[i] == "" {
return _e("Resource filter(Func:%s)'s param invalid", filter.Func)
}
}
if strings.Contains(filter.Func, "Regexp") {
for i := range filter.Params {
_, err := regexp.Compile(filter.Params[i])
if err != nil {
return _e("Regexp: %s cannot be compiled", filter.Params[i])
}
}
}
}
for _, filter := range ar.PushExpr.TagFilters {
// 参数不能是空的,即不能一个参数都没有
if len(filter.Params) == 0 {
return _e("Tags filter(Func:%s)'s param invalid", filter.Func)
}
// 对于每个参数而言,不能包含空格,不能是空
for i := range filter.Params {
if strings.ContainsAny(filter.Params[i], " \r\n\t") {
return _e("Tags filter(Func:%s)'s param invalid", filter.Func)
}
if filter.Params[i] == "" {
return _e("Tags filter(Func:%s)'s param invalid", filter.Func)
}
}
if strings.Contains(filter.Func, "Regexp") {
for i := range filter.Params {
_, err := regexp.Compile(filter.Params[i])
if err != nil {
return _e("Regexp: %s cannot be compiled", filter.Params[i])
}
}
}
}
}
if ar.Type == PULL {
if ar.AlertDuration <= 0 {
ar.AlertDuration = 60
}
if ar.PullExpr.PromQl == "" {
return _e("promql empty")
}
_, err := parser.ParseExpr(ar.PullExpr.PromQl)
if err != nil {
return _e("promql parse error:%s", err.Error())
}
if ar.PullExpr.EvaluationInterval <= 0 {
ar.PullExpr.EvaluationInterval = 15
}
}
ar.AppendTags = strings.TrimSpace(ar.AppendTags)
arr := strings.Fields(ar.AppendTags)
for i := 0; i < len(arr); i++ {
// 如果有appendtags那就要校验一下格式了
if len(strings.Split(arr[i], "=")) != 2 {
return _e("AppendTags(%s) invalid", arr[i])
}
}
// notifyGroups notifyUsers check
gids := strings.Fields(ar.NotifyGroups)
for i := 0; i < len(gids); i++ {
if _, err := strconv.ParseInt(gids[i], 10, 64); err != nil {
// 这个如果真的非法了肯定是恶意流量不用i18n
return fmt.Errorf("NotifyGroups(%s) invalid", ar.NotifyGroups)
}
}
uids := strings.Fields(ar.NotifyUsers)
for i := 0; i < len(uids); i++ {
if _, err := strconv.ParseInt(uids[i], 10, 64); err != nil {
// 这个如果真的非法了肯定是恶意流量不用i18n
return fmt.Errorf("NotifyUsers(%s) invalid", ar.NotifyUsers)
}
}
return nil
}
func AlertRuleCount(where string, args ...interface{}) (num int64, err error) {
num, err = DB.Where(where, args...).Count(new(AlertRule))
if err != nil {
logger.Errorf("mysql.error: count alert_rule fail: %v", err)
return num, internalServerError
}
return num, nil
}
func (ar *AlertRule) Add() error {
if err := ar.Validate(); err != nil {
return err
}
num, err := AlertRuleCount("group_id=? and name=?", ar.GroupId, ar.Name)
if err != nil {
return err
}
if num > 0 {
return _e("Alert rule %s already exists", ar.Name)
}
now := time.Now().Unix()
ar.CreateAt = now
ar.UpdateAt = now
return DBInsertOne(ar)
}
func (ar *AlertRule) Update(cols ...string) error {
if err := ar.Validate(); err != nil {
return err
}
_, err := DB.Where("id=?", ar.Id).Cols(cols...).Update(ar)
if err != nil {
logger.Errorf("mysql.error: update alert_rule(id=%d) fail: %v", ar.Id, err)
return internalServerError
}
return nil
}
func AlertRuleUpdateStatus(ids []int64, status int) error {
_, err := DB.Exec("UPDATE alert_rule SET status=? WHERE id in ("+str.IdsString(ids)+")", status)
return err
}
func AlertRuleUpdateNotifyGroups(ids []int64, notifyGroups string, notifyUsers string) error {
_, err := DB.Exec("UPDATE alert_rule SET notify_groups = ? , notify_users = ? where id in ("+str.IdsString(ids)+")", notifyGroups, notifyUsers)
return err
}
func AlertRuleUpdateNotifyChannels(ids []int64, notifyChannels string) error {
_, err := DB.Exec("UPDATE alert_rule SET notify_channels = ? where id in ("+str.IdsString(ids)+")", notifyChannels)
return err
}
func AlertRuleUpdateAppendTags(ids []int64, appendTags string) error {
_, err := DB.Exec("UPDATE alert_rule SET append_tags = ? where id in ("+str.IdsString(ids)+")", appendTags)
return err
}
func AlertRuleTotal(query string) (num int64, err error) {
if query != "" {
q := "%" + query + "%"
num, err = DB.Where("name like ?", q).Count(new(AlertRule))
} else {
num, err = DB.Count(new(AlertRule))
}
if err != nil {
logger.Errorf("mysql.error: count alert_rule fail: %v", err)
return 0, internalServerError
}
return num, nil
}
func AlertRuleGets(query string, limit, offset int) ([]AlertRule, error) {
session := DB.Limit(limit, offset).OrderBy("name")
if query != "" {
q := "%" + query + "%"
session = session.Where("name like ?", q)
}
var objs []AlertRule
err := session.Find(&objs)
if err != nil {
logger.Errorf("mysql.error: query alert_rule fail: %v", err)
return objs, internalServerError
}
return objs, nil
}
func AlertRulesOfGroup(groupId int64) ([]AlertRule, error) {
var objs []AlertRule
err := DB.Where("group_id=?", groupId).OrderBy("name").Find(&objs)
if err != nil {
logger.Errorf("mysql.error: query alert_rule of group(id=%d) fail: %v", groupId, err)
return objs, internalServerError
}
if len(objs) == 0 {
return []AlertRule{}, nil
}
return objs, nil
}
func AlertRuleGet(where string, args ...interface{}) (*AlertRule, error) {
var obj AlertRule
has, err := DB.Where(where, args...).Get(&obj)
if err != nil {
logger.Errorf("mysql.error: query alert_rule(%s)%+v fail: %s", where, args, err)
return nil, internalServerError
}
if !has {
return nil, nil
}
return &obj, nil
}
func (ar *AlertRule) Del() error {
_, err := DB.Where("id=?", ar.Id).Delete(new(AlertRule))
if err != nil {
logger.Errorf("mysql.error: delete alert_rule fail: %v", err)
return internalServerError
}
return nil
}
func AlertRulesDel(ids []int64) error {
if len(ids) == 0 {
return fmt.Errorf("param ids is empty")
}
_, err := DB.Exec("DELETE FROM alert_rule where id in (" + str.IdsString(ids) + ")")
if err != nil {
logger.Errorf("mysql.error: delete alert_rule(%v) fail: %v", ids, err)
return internalServerError
}
return nil
}
func AlertRuleUpdateGroup(alertRuleIds []int64, groupId int64) error {
if len(alertRuleIds) == 0 {
return fmt.Errorf("param alertRuleIds is empty")
}
_, err := DB.Exec("UPDATE alert_rule SET group_id = ? where id in ("+str.IdsString(alertRuleIds)+")", groupId)
if err != nil {
logger.Errorf("mysql.error: update alert_rule(group_id=%d) fail: %v", groupId, err)
return internalServerError
}
return nil
}
func AllAlertRules() ([]*AlertRule, error) {
var objs []*AlertRule
err := DB.Find(&objs)
return objs, err
}
type AlertRuleStatistic struct {
Count int64 `json:"count"`
MaxUpdateAt int64 `json:"max_update_at"`
}
func GetAlertRuleStatistic() (AlertRuleStatistic, error) {
var obj AlertRuleStatistic
_, err := DB.SQL("select count(1) as count, max(update_at) as max_update_at from alert_rule").Get(&obj)
return obj, err
}