diff --git a/docker/initsql/a-n9e.sql b/docker/initsql/a-n9e.sql index 89dff8cc..6926e425 100644 --- a/docker/initsql/a-n9e.sql +++ b/docker/initsql/a-n9e.sql @@ -212,6 +212,7 @@ CREATE TABLE `alert_rule` ( `notify_channels` varchar(255) not null default '' comment 'split by space: sms voice email dingtalk wecom', `notify_groups` varchar(255) not null default '' comment 'split by space: 233 43', `notify_repeat_step` int not null default 0 comment 'unit: min', + `recover_duration` int not null default 0 comment 'unit: s', `callbacks` varchar(255) not null default '' comment 'split by space: http://a.com/api/x http://a.com/api/y', `runbook_url` varchar(255), `append_tags` varchar(255) not null default '' comment 'split by space: service=n9e mod=api', diff --git a/src/models/alert_rule.go b/src/models/alert_rule.go index 24768f5b..68cebe0d 100644 --- a/src/models/alert_rule.go +++ b/src/models/alert_rule.go @@ -36,6 +36,7 @@ type AlertRule struct { NotifyGroupsObj []UserGroup `json:"notify_groups_obj" gorm:"-"` // for fe NotifyGroupsJSON []string `json:"notify_groups" gorm:"-"` // for fe NotifyRepeatStep int `json:"notify_repeat_step"` // notify repeat interval, unit: min + RecoverDuration int64 `json:"recover_duration"` // unit: s Callbacks string `json:"-"` // split by space: http://a.com/api/x http://a.com/api/y' CallbacksJSON []string `json:"callbacks" gorm:"-"` // for fe RunbookUrl string `json:"runbook_url"` // sop url diff --git a/src/server/engine/worker.go b/src/server/engine/worker.go index 0cbc811a..f67ec7b0 100644 --- a/src/server/engine/worker.go +++ b/src/server/engine/worker.go @@ -343,6 +343,11 @@ func (r RuleEval) recoverRule(alertingKeys map[string]struct{}, now int64) { continue } + // 如果配置了留观时长,就不能立马恢复了 + if r.rule.RecoverDuration > 0 && now-event.LastEvalTime <= r.rule.RecoverDuration { + continue + } + // 没查到触发阈值的vector,姑且就认为这个vector的值恢复了 // 我确实无法分辨,是prom中有值但是未满足阈值所以没返回,还是prom中确实丢了一些点导致没有数据可以返回,尴尬 delete(r.fires, hash)