diff --git a/src/server/engine/consume.go b/src/server/engine/consume.go index 2388f5be..c2eeaa62 100644 --- a/src/server/engine/consume.go +++ b/src/server/engine/consume.go @@ -58,36 +58,7 @@ func persist(event *models.AlertCurEvent) { his := event.ToHis() - if has { - // 数据库里有这个事件,说明之前触发过了 - if event.IsRecovered { - // 本次恢复了,把未恢复的事件删除,在全量告警里添加记录 - err := models.AlertCurEventDelByHash(event.Hash) - if err != nil { - logger.Errorf("event_del_cur_fail: %v hash=%s", err, event.Hash) - } - - if err := his.Add(); err != nil { - logger.Errorf( - "event_persist_his_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s", - err, - event.RuleId, - event.Hash, - event.TagsJSON, - event.TriggerTime, - event.TriggerValue, - ) - } - } - return - } - - if event.IsRecovered { - // alert_cur_event表里没有数据,表示之前没告警,结果现在报了恢复,神奇....理论上不应该出现的 - return - } - - // 本次是告警,alert_cur_event表里也没有数据 + // 不管是告警还是恢复,全量告警里都要记录 if err := his.Add(); err != nil { logger.Errorf( "event_persist_his_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s", @@ -100,6 +71,41 @@ func persist(event *models.AlertCurEvent) { ) } + if has { + // 活跃告警表中有记录,删之 + err = models.AlertCurEventDelByHash(event.Hash) + if err != nil { + logger.Errorf("event_del_cur_fail: %v hash=%s", err, event.Hash) + return + } + + if !event.IsRecovered { + // 恢复事件,从活跃告警列表彻底删掉,告警事件,要重新加进来新的event + // use his id as cur id + event.Id = his.Id + if event.Id > 0 { + if err := event.Add(); err != nil { + logger.Errorf( + "event_persist_cur_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s", + err, + event.RuleId, + event.Hash, + event.TagsJSON, + event.TriggerTime, + event.TriggerValue, + ) + } + } + } + + return + } + + if event.IsRecovered { + // alert_cur_event表里没有数据,表示之前没告警,结果现在报了恢复,神奇....理论上不应该出现的 + return + } + // use his id as cur id event.Id = his.Id if event.Id > 0 { diff --git a/src/server/engine/engine.go b/src/server/engine/engine.go index c092f7ed..1bbd24ab 100644 --- a/src/server/engine/engine.go +++ b/src/server/engine/engine.go @@ -21,7 +21,7 @@ func Start(ctx context.Context) error { go loopFilterRules(ctx) // repeat notifier - go loopRepeat(ctx) + // go loopRepeat(ctx) go reportQueueSize() diff --git a/src/server/engine/worker.go b/src/server/engine/worker.go index 46283d07..34b55500 100644 --- a/src/server/engine/worker.go +++ b/src/server/engine/worker.go @@ -288,8 +288,18 @@ func labelMapToArr(m map[string]string) []string { } func (r RuleEval) handleNewEvent(event *models.AlertCurEvent) { - if _, has := r.fires[event.Hash]; has { - // fired before, nothing to do + if fired, has := r.fires[event.Hash]; has { + if r.rule.NotifyRepeatStep == 0 { + // 说明不想重复通知,那就直接返回了,nothing to do + return + } + + // 之前发送过告警了,这次是否要继续发送,要看是否过了通道静默时间 + if event.LastEvalTime > fired.LastEvalTime+int64(r.rule.NotifyRepeatStep) { + r.fires[event.Hash] = event + pushEventToQueue(event) + } + return }