add feature: recover_duration

This commit is contained in:
Ulric Qin 2022-01-26 08:59:30 +08:00
parent e5d7612af9
commit 598ae07fc2
3 changed files with 7 additions and 0 deletions

View File

@ -212,6 +212,7 @@ CREATE TABLE `alert_rule` (
`notify_channels` varchar(255) not null default '' comment 'split by space: sms voice email dingtalk wecom',
`notify_groups` varchar(255) not null default '' comment 'split by space: 233 43',
`notify_repeat_step` int not null default 0 comment 'unit: min',
`recover_duration` int not null default 0 comment 'unit: s',
`callbacks` varchar(255) not null default '' comment 'split by space: http://a.com/api/x http://a.com/api/y',
`runbook_url` varchar(255),
`append_tags` varchar(255) not null default '' comment 'split by space: service=n9e mod=api',

View File

@ -36,6 +36,7 @@ type AlertRule struct {
NotifyGroupsObj []UserGroup `json:"notify_groups_obj" gorm:"-"` // for fe
NotifyGroupsJSON []string `json:"notify_groups" gorm:"-"` // for fe
NotifyRepeatStep int `json:"notify_repeat_step"` // notify repeat interval, unit: min
RecoverDuration int64 `json:"recover_duration"` // unit: s
Callbacks string `json:"-"` // split by space: http://a.com/api/x http://a.com/api/y'
CallbacksJSON []string `json:"callbacks" gorm:"-"` // for fe
RunbookUrl string `json:"runbook_url"` // sop url

View File

@ -343,6 +343,11 @@ func (r RuleEval) recoverRule(alertingKeys map[string]struct{}, now int64) {
continue
}
// 如果配置了留观时长,就不能立马恢复了
if r.rule.RecoverDuration > 0 && now-event.LastEvalTime <= r.rule.RecoverDuration {
continue
}
// 没查到触发阈值的vector姑且就认为这个vector的值恢复了
// 我确实无法分辨是prom中有值但是未满足阈值所以没返回还是prom中确实丢了一些点导致没有数据可以返回尴尬
delete(r.fires, hash)