refactor repeater

This commit is contained in:
Ulric Qin 2021-12-23 22:19:49 +08:00
parent 3428b11ea8
commit 837cfab1bd
3 changed files with 49 additions and 33 deletions

View File

@ -58,36 +58,7 @@ func persist(event *models.AlertCurEvent) {
his := event.ToHis()
if has {
// 数据库里有这个事件,说明之前触发过了
if event.IsRecovered {
// 本次恢复了,把未恢复的事件删除,在全量告警里添加记录
err := models.AlertCurEventDelByHash(event.Hash)
if err != nil {
logger.Errorf("event_del_cur_fail: %v hash=%s", err, event.Hash)
}
if err := his.Add(); err != nil {
logger.Errorf(
"event_persist_his_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s",
err,
event.RuleId,
event.Hash,
event.TagsJSON,
event.TriggerTime,
event.TriggerValue,
)
}
}
return
}
if event.IsRecovered {
// alert_cur_event表里没有数据表示之前没告警结果现在报了恢复神奇....理论上不应该出现的
return
}
// 本次是告警alert_cur_event表里也没有数据
// 不管是告警还是恢复,全量告警里都要记录
if err := his.Add(); err != nil {
logger.Errorf(
"event_persist_his_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s",
@ -100,6 +71,41 @@ func persist(event *models.AlertCurEvent) {
)
}
if has {
// 活跃告警表中有记录,删之
err = models.AlertCurEventDelByHash(event.Hash)
if err != nil {
logger.Errorf("event_del_cur_fail: %v hash=%s", err, event.Hash)
return
}
if !event.IsRecovered {
// 恢复事件从活跃告警列表彻底删掉告警事件要重新加进来新的event
// use his id as cur id
event.Id = his.Id
if event.Id > 0 {
if err := event.Add(); err != nil {
logger.Errorf(
"event_persist_cur_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s",
err,
event.RuleId,
event.Hash,
event.TagsJSON,
event.TriggerTime,
event.TriggerValue,
)
}
}
}
return
}
if event.IsRecovered {
// alert_cur_event表里没有数据表示之前没告警结果现在报了恢复神奇....理论上不应该出现的
return
}
// use his id as cur id
event.Id = his.Id
if event.Id > 0 {

View File

@ -21,7 +21,7 @@ func Start(ctx context.Context) error {
go loopFilterRules(ctx)
// repeat notifier
go loopRepeat(ctx)
// go loopRepeat(ctx)
go reportQueueSize()

View File

@ -288,8 +288,18 @@ func labelMapToArr(m map[string]string) []string {
}
func (r RuleEval) handleNewEvent(event *models.AlertCurEvent) {
if _, has := r.fires[event.Hash]; has {
// fired before, nothing to do
if fired, has := r.fires[event.Hash]; has {
if r.rule.NotifyRepeatStep == 0 {
// 说明不想重复通知那就直接返回了nothing to do
return
}
// 之前发送过告警了,这次是否要继续发送,要看是否过了通道静默时间
if event.LastEvalTime > fired.LastEvalTime+int64(r.rule.NotifyRepeatStep) {
r.fires[event.Hash] = event
pushEventToQueue(event)
}
return
}