judge refactor
This commit is contained in:
parent
94eb306692
commit
a8f7f6a04e
|
@ -4,10 +4,12 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/didi/nightingale/src/common/dataobj"
|
"github.com/didi/nightingale/src/common/dataobj"
|
||||||
|
"github.com/didi/nightingale/src/models"
|
||||||
"github.com/didi/nightingale/src/modules/judge/cache"
|
"github.com/didi/nightingale/src/modules/judge/cache"
|
||||||
"github.com/didi/nightingale/src/toolkits/stats"
|
"github.com/didi/nightingale/src/toolkits/stats"
|
||||||
"github.com/didi/nightingale/src/toolkits/str"
|
"github.com/didi/nightingale/src/toolkits/str"
|
||||||
|
@ -23,17 +25,18 @@ var (
|
||||||
|
|
||||||
// 执行Query操作
|
// 执行Query操作
|
||||||
// 默认不重试, 如果要做重试, 在这里完成
|
// 默认不重试, 如果要做重试, 在这里完成
|
||||||
func Query(reqs []*dataobj.QueryData, sid int64, expFunc string) []*dataobj.TsdbQueryResponse {
|
func Query(reqs []*dataobj.QueryData, stra *models.Stra, expFunc string) []*dataobj.TsdbQueryResponse {
|
||||||
stats.Counter.Set("query.data", 1)
|
stats.Counter.Set("query.data", 1)
|
||||||
var resp *dataobj.QueryDataResp
|
var resp *dataobj.QueryDataResp
|
||||||
var respData []*dataobj.TsdbQueryResponse
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
respData, reqs = QueryFromMem(reqs, sid)
|
filterMap := make(map[string]struct{})
|
||||||
if len(reqs) > 0 {
|
|
||||||
|
respData, newReqs := QueryFromMem(reqs, stra)
|
||||||
|
if len(newReqs) > 0 {
|
||||||
stats.Counter.Set("query.data.by.transfer", 1)
|
stats.Counter.Set("query.data.by.transfer", 1)
|
||||||
for i := 0; i < 3; i++ {
|
for i := 0; i < 3; i++ {
|
||||||
err = TransferConnPools.Call("", "Transfer.Query", reqs, &resp)
|
err = TransferConnPools.Call("", "Transfer.Query", newReqs, &resp)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -41,16 +44,73 @@ func Query(reqs []*dataobj.QueryData, sid int64, expFunc string) []*dataobj.Tsdb
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
stats.Counter.Set("query.data.transfer.err", 1)
|
stats.Counter.Set("query.data.transfer.err", 1)
|
||||||
logger.Warning("get data err:%v msg:%+v, query data from mem", err, resp)
|
logger.Warningf("get data err:%v", err)
|
||||||
} else {
|
} else {
|
||||||
respData = append(respData, resp.Data...)
|
for i := 0; i < len(resp.Data); i++ {
|
||||||
|
var values dataobj.RRDValues
|
||||||
|
count := len(resp.Data[i].Values)
|
||||||
|
//裁剪掉多余的点
|
||||||
|
for j := count - 1; j > 0; j-- {
|
||||||
|
if resp.Data[i].Values[count-1].Timestamp-resp.Data[i].Values[j].Timestamp > int64(stra.AlertDur) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
values = append(values, resp.Data[i].Values[j])
|
||||||
|
}
|
||||||
|
sort.Sort(values)
|
||||||
|
|
||||||
|
resp.Data[i].Values = values
|
||||||
|
respData = append(respData, resp.Data[i])
|
||||||
|
key := resp.Data[i].Endpoint + "/" + resp.Data[i].Nid + "/" + resp.Data[i].Counter
|
||||||
|
filterMap[key] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//补全查询数据丢失的曲线
|
||||||
|
for _, req := range newReqs {
|
||||||
|
if len(req.Endpoints) > 0 {
|
||||||
|
for _, endpoint := range req.Endpoints {
|
||||||
|
for _, counter := range req.Counters {
|
||||||
|
key := endpoint + "//" + counter
|
||||||
|
if _, exists := filterMap[key]; exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
data := &dataobj.TsdbQueryResponse{
|
||||||
|
Start: req.Start,
|
||||||
|
End: req.End,
|
||||||
|
Endpoint: endpoint,
|
||||||
|
Counter: counter,
|
||||||
|
Step: req.Step,
|
||||||
|
}
|
||||||
|
respData = append(respData, data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(req.Nids) > 0 {
|
||||||
|
for _, nid := range req.Nids {
|
||||||
|
for _, counter := range req.Counters {
|
||||||
|
key := "/" + nid + "/" + counter
|
||||||
|
if _, exists := filterMap[key]; exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
data := &dataobj.TsdbQueryResponse{
|
||||||
|
Start: req.Start,
|
||||||
|
End: req.End,
|
||||||
|
Nid: nid,
|
||||||
|
Counter: counter,
|
||||||
|
Step: req.Step,
|
||||||
|
}
|
||||||
|
respData = append(respData, data)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return respData
|
return respData
|
||||||
}
|
}
|
||||||
|
|
||||||
func QueryFromMem(reqs []*dataobj.QueryData, sid int64) ([]*dataobj.TsdbQueryResponse, []*dataobj.QueryData) {
|
func QueryFromMem(reqs []*dataobj.QueryData, stra *models.Stra) ([]*dataobj.TsdbQueryResponse, []*dataobj.QueryData) {
|
||||||
stats.Counter.Set("query.data.by.mem", 1)
|
stats.Counter.Set("query.data.by.mem", 1)
|
||||||
|
|
||||||
var resps []*dataobj.TsdbQueryResponse
|
var resps []*dataobj.TsdbQueryResponse
|
||||||
|
@ -79,7 +139,7 @@ func QueryFromMem(reqs []*dataobj.QueryData, sid int64) ([]*dataobj.TsdbQueryRes
|
||||||
Nid: nid,
|
Nid: nid,
|
||||||
Metric: metric,
|
Metric: metric,
|
||||||
TagsMap: tagsMap,
|
TagsMap: tagsMap,
|
||||||
Sid: sid,
|
Sid: stra.Id,
|
||||||
}
|
}
|
||||||
|
|
||||||
pk := item.MD5()
|
pk := item.MD5()
|
||||||
|
@ -88,7 +148,7 @@ func QueryFromMem(reqs []*dataobj.QueryData, sid int64) ([]*dataobj.TsdbQueryRes
|
||||||
historyData := linkedList.QueryDataByTS(req.Start, req.End)
|
historyData := linkedList.QueryDataByTS(req.Start, req.End)
|
||||||
resp.Values = dataobj.HistoryData2RRDData(historyData)
|
resp.Values = dataobj.HistoryData2RRDData(historyData)
|
||||||
}
|
}
|
||||||
if len(resp.Values) > 0 {
|
if len(resp.Values) > 0 && resp.Values[len(resp.Values)-1].Timestamp-resp.Values[0].Timestamp >= int64(stra.AlertDur) {
|
||||||
resps = append(resps, resp)
|
resps = append(resps, resp)
|
||||||
} else {
|
} else {
|
||||||
newReq.Nids = append(newReq.Nids, nid)
|
newReq.Nids = append(newReq.Nids, nid)
|
||||||
|
@ -112,7 +172,7 @@ func QueryFromMem(reqs []*dataobj.QueryData, sid int64) ([]*dataobj.TsdbQueryRes
|
||||||
Endpoint: endpoint,
|
Endpoint: endpoint,
|
||||||
Metric: metric,
|
Metric: metric,
|
||||||
TagsMap: tagsMap,
|
TagsMap: tagsMap,
|
||||||
Sid: sid,
|
Sid: stra.Id,
|
||||||
}
|
}
|
||||||
|
|
||||||
pk := item.MD5()
|
pk := item.MD5()
|
||||||
|
@ -121,7 +181,7 @@ func QueryFromMem(reqs []*dataobj.QueryData, sid int64) ([]*dataobj.TsdbQueryRes
|
||||||
historyData := linkedList.QueryDataByTS(req.Start, req.End)
|
historyData := linkedList.QueryDataByTS(req.Start, req.End)
|
||||||
resp.Values = dataobj.HistoryData2RRDData(historyData)
|
resp.Values = dataobj.HistoryData2RRDData(historyData)
|
||||||
}
|
}
|
||||||
if len(resp.Values) > 0 {
|
if len(resp.Values) > 0 && resp.Values[len(resp.Values)-1].Timestamp-resp.Values[0].Timestamp >= int64(stra.AlertDur) {
|
||||||
resps = append(resps, resp)
|
resps = append(resps, resp)
|
||||||
} else {
|
} else {
|
||||||
newReq.Endpoints = append(newReq.Endpoints, endpoint)
|
newReq.Endpoints = append(newReq.Endpoints, endpoint)
|
||||||
|
|
|
@ -86,7 +86,7 @@ func ToJudge(historyMap *cache.JudgeItemMap, key string, val *dataobj.JudgeItem,
|
||||||
}
|
}
|
||||||
} else { //与条件
|
} else { //与条件
|
||||||
for _, expr := range stra.Exprs {
|
for _, expr := range stra.Exprs {
|
||||||
respData, err := GetData(stra, expr, val, now, true)
|
respData, err := GetData(stra, expr, val, now)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("stra:%+v get query data err:%v", stra, err)
|
logger.Errorf("stra:%+v get query data err:%v", stra, err)
|
||||||
return
|
return
|
||||||
|
@ -130,7 +130,7 @@ func ToJudge(historyMap *cache.JudgeItemMap, key string, val *dataobj.JudgeItem,
|
||||||
Hashid: getHashId(stra.Id, val),
|
Hashid: getHashId(stra.Id, val),
|
||||||
}
|
}
|
||||||
|
|
||||||
sendEventIfNeed(historyData, statusArr, event, stra)
|
sendEventIfNeed(statusArr, event, stra)
|
||||||
}
|
}
|
||||||
|
|
||||||
func Judge(stra *models.Stra, exp models.Exp, historyData []*dataobj.HistoryData, firstItem *dataobj.JudgeItem, now int64) (history dataobj.History, info string, lastValue string, status bool) {
|
func Judge(stra *models.Stra, exp models.Exp, historyData []*dataobj.HistoryData, firstItem *dataobj.JudgeItem, now int64) (history dataobj.History, info string, lastValue string, status bool) {
|
||||||
|
@ -188,7 +188,7 @@ func judgeItemWithStrategy(stra *models.Stra, historyData []*dataobj.HistoryData
|
||||||
stra.AlertDur = 7 * firstItem.Step
|
stra.AlertDur = 7 * firstItem.Step
|
||||||
}
|
}
|
||||||
|
|
||||||
respItems, err := GetData(stra, exp, firstItem, now-int64(exp.Params[0]), true)
|
respItems, err := GetData(stra, exp, firstItem, now-int64(exp.Params[0]))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("stra:%v %+v get compare data err:%v", stra.Id, exp, err)
|
logger.Errorf("stra:%v %+v get compare data err:%v", stra.Id, exp, err)
|
||||||
return
|
return
|
||||||
|
@ -218,16 +218,16 @@ func judgeItemWithStrategy(stra *models.Stra, historyData []*dataobj.HistoryData
|
||||||
return fn.Compute(historyData)
|
return fn.Compute(historyData)
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetData(stra *models.Stra, exp models.Exp, firstItem *dataobj.JudgeItem, now int64, sameTag bool) ([]*dataobj.TsdbQueryResponse, error) {
|
func GetData(stra *models.Stra, exp models.Exp, firstItem *dataobj.JudgeItem, now int64) ([]*dataobj.TsdbQueryResponse, error) {
|
||||||
var reqs []*dataobj.QueryData
|
var reqs []*dataobj.QueryData
|
||||||
var respData []*dataobj.TsdbQueryResponse
|
var respData []*dataobj.TsdbQueryResponse
|
||||||
var err error
|
var err error
|
||||||
if sameTag { //与条件要求是相同tag的场景,不需要查询索引
|
|
||||||
if firstItem.Tags != "" && len(firstItem.TagsMap) == 0 {
|
if firstItem.Tags != "" && len(firstItem.TagsMap) == 0 {
|
||||||
firstItem.TagsMap = str.DictedTagstring(firstItem.Tags)
|
firstItem.TagsMap = str.DictedTagstring(firstItem.Tags)
|
||||||
}
|
}
|
||||||
//+1 防止由于查询不到最新点,导致点数不够
|
|
||||||
start := now - int64(stra.AlertDur) - int64(firstItem.Step) + 1
|
//多查一些数据,防止由于查询不到最新点,导致点数不够
|
||||||
|
start := now - int64(stra.AlertDur) - int64(firstItem.Step) - 60
|
||||||
|
|
||||||
queryParam, err := query.NewQueryRequest(firstItem.Nid, firstItem.Endpoint, exp.Metric, firstItem.TagsMap, firstItem.Step, start, now)
|
queryParam, err := query.NewQueryRequest(firstItem.Nid, firstItem.Endpoint, exp.Metric, firstItem.TagsMap, firstItem.Step, start, now)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -235,28 +235,13 @@ func GetData(stra *models.Stra, exp models.Exp, firstItem *dataobj.JudgeItem, no
|
||||||
}
|
}
|
||||||
|
|
||||||
reqs = append(reqs, queryParam)
|
reqs = append(reqs, queryParam)
|
||||||
} /*else if firstItem != nil { //点驱动告警策略的场景
|
|
||||||
var nids, endpoints []string
|
|
||||||
if firstItem.Nid != "" {
|
|
||||||
nids = []string{firstItem.Nid}
|
|
||||||
} else if firstItem.Endpoint != "" {
|
|
||||||
endpoints = []string{firstItem.Endpoint}
|
|
||||||
}
|
|
||||||
reqs = GetReqs(stra, exp.Metric, nids, endpoints, now)
|
|
||||||
//} else { //nodata的场景
|
|
||||||
// reqs = GetReqs(stra, exp.Metric, stra.Nids, stra.Endpoints, now)
|
|
||||||
}*/
|
|
||||||
|
|
||||||
if len(reqs) == 0 {
|
if len(reqs) == 0 {
|
||||||
return respData, err
|
return respData, err
|
||||||
}
|
}
|
||||||
|
|
||||||
respData = query.Query(reqs, stra.Id, exp.Func)
|
respData = query.Query(reqs, stra, exp.Func)
|
||||||
|
|
||||||
if len(respData) < 1 {
|
|
||||||
stats.Counter.Set("get.data.null", 1)
|
|
||||||
err = fmt.Errorf("get query data is null")
|
|
||||||
}
|
|
||||||
return respData, err
|
return respData, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,19 +277,6 @@ func GetReqs(stra *models.Stra, metric string, nids, endpoints []string, now int
|
||||||
|
|
||||||
lostSeries := []cache.Series{}
|
lostSeries := []cache.Series{}
|
||||||
for _, index := range indexsData {
|
for _, index := range indexsData {
|
||||||
if index.Step == 0 {
|
|
||||||
//没有查到索引的 endpoint+metric 也要记录,给nodata处理
|
|
||||||
s := cache.Series{
|
|
||||||
Nid: index.Nid,
|
|
||||||
Endpoint: index.Endpoint,
|
|
||||||
Metric: index.Metric,
|
|
||||||
Tag: "",
|
|
||||||
Step: 10,
|
|
||||||
Dstype: "GAUGE",
|
|
||||||
TS: now,
|
|
||||||
}
|
|
||||||
lostSeries = append(lostSeries, s)
|
|
||||||
} else {
|
|
||||||
if len(index.Tags) == 0 {
|
if len(index.Tags) == 0 {
|
||||||
hash := getHash(index, "")
|
hash := getHash(index, "")
|
||||||
s := cache.Series{
|
s := cache.Series{
|
||||||
|
@ -331,8 +303,6 @@ func GetReqs(stra *models.Stra, metric string, nids, endpoints []string, now int
|
||||||
}
|
}
|
||||||
cache.SeriesMap.Set(stra.Id, hash, s)
|
cache.SeriesMap.Set(stra.Id, hash, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -397,7 +367,7 @@ func GetReqs(stra *models.Stra, metric string, nids, endpoints []string, now int
|
||||||
return reqs
|
return reqs
|
||||||
}
|
}
|
||||||
|
|
||||||
func sendEventIfNeed(historyData []*dataobj.HistoryData, status []bool, event *dataobj.Event, stra *models.Stra) {
|
func sendEventIfNeed(status []bool, event *dataobj.Event, stra *models.Stra) {
|
||||||
isTriggered := true
|
isTriggered := true
|
||||||
for _, s := range status {
|
for _, s := range status {
|
||||||
isTriggered = isTriggered && s
|
isTriggered = isTriggered && s
|
||||||
|
|
|
@ -46,46 +46,27 @@ func nodataJudge() {
|
||||||
logger.Debugf("stra:%+v endpoints or nids is null", stra)
|
logger.Debugf("stra:%+v endpoints or nids is null", stra)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if len(stra.Exprs) == 0 {
|
||||||
|
logger.Debugf("stra:%+v exp or nids is null", stra)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
now := time.Now().Unix()
|
now := time.Now().Unix()
|
||||||
respData, err := GetData(stra, stra.Exprs[0], nil, now, false)
|
reqs := GetReqs(stra, stra.Exprs[0].Metric, stra.Nids, stra.Endpoints, now)
|
||||||
if err != nil {
|
if len(reqs) == 0 {
|
||||||
logger.Errorf("stra:%+v get query data err:%v", stra, err)
|
logger.Errorf("stra:%+v get query data err:req is null", stra)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, data := range respData {
|
items := getJudgeItems(reqs)
|
||||||
var metric, tag string
|
for _, item := range items {
|
||||||
// 兼容格式disk.bytes.free/mount=/data/docker/overlay2/xxx/merged
|
|
||||||
arr := strings.SplitN(data.Counter, "/", 2)
|
|
||||||
if len(arr) == 2 {
|
|
||||||
metric = arr[0]
|
|
||||||
tag = arr[1]
|
|
||||||
} else {
|
|
||||||
metric = data.Counter
|
|
||||||
}
|
|
||||||
|
|
||||||
if data.Endpoint == "" && data.Nid == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
judgeItem := &dataobj.JudgeItem{
|
|
||||||
Nid: data.Nid,
|
|
||||||
Endpoint: data.Endpoint,
|
|
||||||
Metric: metric,
|
|
||||||
Tags: tag,
|
|
||||||
TagsMap: dataobj.DictedTagstring(tag),
|
|
||||||
DsType: data.DsType,
|
|
||||||
Step: data.Step,
|
|
||||||
}
|
|
||||||
|
|
||||||
nodataJob.Acquire()
|
nodataJob.Acquire()
|
||||||
go AsyncJudge(nodataJob, stra, stra.Exprs, dataobj.RRDData2HistoryData(data.Values), judgeItem, now)
|
go AsyncJudge(nodataJob, stra, stra.Exprs, item, now)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func AsyncJudge(sema *semaphore.Semaphore, stra *models.Stra, exps []models.Exp, historyData []*dataobj.HistoryData, firstItem *dataobj.JudgeItem, now int64) {
|
func AsyncJudge(sema *semaphore.Semaphore, stra *models.Stra, exps []models.Exp, firstItem *dataobj.JudgeItem, now int64) {
|
||||||
defer sema.Release()
|
defer sema.Release()
|
||||||
|
|
||||||
historyArr := []dataobj.History{}
|
historyArr := []dataobj.History{}
|
||||||
|
@ -94,7 +75,7 @@ func AsyncJudge(sema *semaphore.Semaphore, stra *models.Stra, exps []models.Exp,
|
||||||
value := ""
|
value := ""
|
||||||
|
|
||||||
for _, expr := range exps {
|
for _, expr := range exps {
|
||||||
respData, err := GetData(stra, expr, firstItem, now, true)
|
respData, err := GetData(stra, expr, firstItem, now)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("stra:%+v get query data err:%v", stra, err)
|
logger.Errorf("stra:%+v get query data err:%v", stra, err)
|
||||||
return
|
return
|
||||||
|
@ -136,5 +117,50 @@ func AsyncJudge(sema *semaphore.Semaphore, stra *models.Stra, exps []models.Exp,
|
||||||
Hashid: getHashId(stra.Id, firstItem),
|
Hashid: getHashId(stra.Id, firstItem),
|
||||||
}
|
}
|
||||||
|
|
||||||
sendEventIfNeed(historyData, statusArr, event, stra)
|
sendEventIfNeed(statusArr, event, stra)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getJudgeItems(reqs []*dataobj.QueryData) []*dataobj.JudgeItem {
|
||||||
|
var items []*dataobj.JudgeItem
|
||||||
|
for _, req := range reqs {
|
||||||
|
for _, counter := range req.Counters {
|
||||||
|
var metric, tag string
|
||||||
|
// 兼容格式disk.bytes.free/mount=/data/docker/overlay2/xxx/merged
|
||||||
|
arr := strings.SplitN(counter, "/", 2)
|
||||||
|
if len(arr) == 2 {
|
||||||
|
metric = arr[0]
|
||||||
|
tag = arr[1]
|
||||||
|
} else {
|
||||||
|
metric = counter
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(req.Nids) != 0 {
|
||||||
|
for _, nid := range req.Nids {
|
||||||
|
judgeItem := &dataobj.JudgeItem{
|
||||||
|
Nid: nid,
|
||||||
|
Endpoint: "",
|
||||||
|
Metric: metric,
|
||||||
|
Tags: tag,
|
||||||
|
TagsMap: dataobj.DictedTagstring(tag),
|
||||||
|
DsType: req.DsType,
|
||||||
|
Step: req.Step,
|
||||||
|
}
|
||||||
|
items = append(items, judgeItem)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for _, endpoint := range req.Endpoints {
|
||||||
|
judgeItem := &dataobj.JudgeItem{
|
||||||
|
Endpoint: endpoint,
|
||||||
|
Metric: metric,
|
||||||
|
Tags: tag,
|
||||||
|
TagsMap: dataobj.DictedTagstring(tag),
|
||||||
|
DsType: req.DsType,
|
||||||
|
Step: req.Step,
|
||||||
|
}
|
||||||
|
items = append(items, judgeItem)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return items
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue