1. Move the default ql to the configuration (#764)

2. add slowLogRecordSecond to  log slow query
3. Create a slice with a specified length to avoid dynamic expansion
4. slow query print fetch series time took and the result series num
This commit is contained in:
ning1875 2021-08-10 15:25:54 +08:00 committed by GitHub
parent 8b508fc514
commit 42fc0527cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 74 additions and 17 deletions

View File

@ -37,6 +37,8 @@ type PromeSection struct {
MaxConcurrentQuery int `yaml:"maxConcurrentQuery"` MaxConcurrentQuery int `yaml:"maxConcurrentQuery"`
MaxSamples int `yaml:"maxSamples"` MaxSamples int `yaml:"maxSamples"`
MaxFetchAllSeriesLimitMinute int64 `yaml:"maxFetchAllSeriesLimitMinute"` MaxFetchAllSeriesLimitMinute int64 `yaml:"maxFetchAllSeriesLimitMinute"`
SlowLogRecordSecond float64 `yaml:"slowLogRecordSecond"`
DefaultFetchSeriesQl string `yaml:"defaultFetchSeriesQl"`
RemoteWrite []RemoteConfig `yaml:"remoteWrite"` RemoteWrite []RemoteConfig `yaml:"remoteWrite"`
RemoteRead []RemoteConfig `yaml:"remoteRead"` RemoteRead []RemoteConfig `yaml:"remoteRead"`
} }

View File

@ -23,7 +23,6 @@ import (
const ( const (
LABEL_IDENT = "ident" LABEL_IDENT = "ident"
LABEL_NAME = "__name__" LABEL_NAME = "__name__"
DEFAULT_QL = `{__name__=~".*a.*|.*e.*"}`
DEFAULT_STEP = 15 DEFAULT_STEP = 15
) )
@ -323,7 +322,7 @@ func (pd *PromeDataSource) CommonQuerySeries(cj *commonQueryObj) storage.SeriesS
qlStrFinal := convertToPromql(cj) qlStrFinal := convertToPromql(cj)
if qlStrFinal == "{}" { if qlStrFinal == "{}" {
qlStrFinal = DEFAULT_QL qlStrFinal = pd.Section.DefaultFetchSeriesQl
reqMinute := (cj.End - cj.Start) / 60 reqMinute := (cj.End - cj.Start) / 60
// 如果前端啥都没传要限制下查询series的时间范围防止高基础查询 // 如果前端啥都没传要限制下查询series的时间范围防止高基础查询
if reqMinute > pd.Section.MaxFetchAllSeriesLimitMinute { if reqMinute > pd.Section.MaxFetchAllSeriesLimitMinute {
@ -379,7 +378,19 @@ func (pd *PromeDataSource) CommonQuerySeries(cj *commonQueryObj) storage.SeriesS
} }
// Get all series which match matchers. // Get all series which match matchers.
startTs := time.Now()
s := q.Select(true, hints, matcherSets[0]...) s := q.Select(true, hints, matcherSets[0]...)
timeTookSecond := time.Since(startTs).Seconds()
if timeTookSecond > pd.Section.SlowLogRecordSecond {
logger.Warningf("[prome_remote_read_show_slow_log_CommonQuerySeries_select][threshold:%v][timeTookSecond:%v][from:%v][args:%+v][promql:%v]",
pd.Section.SlowLogRecordSecond,
timeTookSecond,
cj.From,
cj,
qlStrFinal,
)
}
return s return s
} }
@ -389,6 +400,7 @@ func (pd *PromeDataSource) CommonQuerySeries(cj *commonQueryObj) storage.SeriesS
// TODO 等待prometheus官方对 remote_read label_values 的支持 // TODO 等待prometheus官方对 remote_read label_values 的支持
// Implement: https://github.com/prometheus/prometheus/issues/3351 // Implement: https://github.com/prometheus/prometheus/issues/3351
func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKeyQueryResp { func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKeyQueryResp {
startTs := time.Now()
respD := &vos.TagKeyQueryResp{ respD := &vos.TagKeyQueryResp{
Keys: make([]string, 0), Keys: make([]string, 0),
} }
@ -400,7 +412,7 @@ func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKe
Metric: "", Metric: "",
}) })
} }
resultSeries := ""
for _, x := range recv.Params { for _, x := range recv.Params {
cj := &commonQueryObj{ cj := &commonQueryObj{
Idents: x.Idents, Idents: x.Idents,
@ -421,8 +433,10 @@ func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKe
logger.Errorf("[prome_query_error][series_set_iter_error][err:%+v]", err) logger.Errorf("[prome_query_error][series_set_iter_error][err:%+v]", err)
continue continue
} }
thisSeriesNum := 0
for s.Next() { for s.Next() {
series := s.At() series := s.At()
thisSeriesNum++
for _, lb := range series.Labels() { for _, lb := range series.Labels() {
if lb.Name == LABEL_NAME { if lb.Name == LABEL_NAME {
continue continue
@ -436,12 +450,14 @@ func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKe
labelNamesSet[lb.Name] = struct{}{} labelNamesSet[lb.Name] = struct{}{}
} }
} }
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)
} }
names := make([]string, 0) names := make([]string, len(labelNamesSet))
i := 0
for key := range labelNamesSet { for key := range labelNamesSet {
names[i] = key
names = append(names, key) i++
} }
sort.Strings(names) sort.Strings(names)
// 因为map中的key是无序的必须这样才能稳定输出 // 因为map中的key是无序的必须这样才能稳定输出
@ -450,12 +466,17 @@ func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKe
} }
respD.Keys = names respD.Keys = names
timeTookSecond := time.Since(startTs).Seconds()
if timeTookSecond > pd.Section.SlowLogRecordSecond {
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryTagKeys][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
}
return respD return respD
} }
// 对应prometheus 中的 /api/v1/label/<label_name>/values // 对应prometheus 中的 /api/v1/label/<label_name>/values
func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.TagValueQueryResp { func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.TagValueQueryResp {
startTs := time.Now()
labelValuesSet := make(map[string]struct{}) labelValuesSet := make(map[string]struct{})
if len(recv.Params) == 0 { if len(recv.Params) == 0 {
@ -464,7 +485,7 @@ func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.Tag
Metric: "", Metric: "",
}) })
} }
resultSeries := ""
for _, x := range recv.Params { for _, x := range recv.Params {
cj := &commonQueryObj{ cj := &commonQueryObj{
Idents: x.Idents, Idents: x.Idents,
@ -485,9 +506,10 @@ func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.Tag
logger.Errorf("[prome_query_error][series_set_iter_error][err:%+v]", err) logger.Errorf("[prome_query_error][series_set_iter_error][err:%+v]", err)
continue continue
} }
thisSeriesNum := 0
for s.Next() { for s.Next() {
series := s.At() series := s.At()
thisSeriesNum++
for _, lb := range series.Labels() { for _, lb := range series.Labels() {
if lb.Name == recv.TagKey { if lb.Name == recv.TagKey {
if recv.TagValue != "" { if recv.TagValue != "" {
@ -500,11 +522,13 @@ func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.Tag
} }
} }
} }
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)
} }
vals := make([]string, 0) vals := make([]string, len(labelValuesSet))
i := 0
for val := range labelValuesSet { for val := range labelValuesSet {
vals[i] = val
vals = append(vals, val) i++
} }
sort.Strings(vals) sort.Strings(vals)
if recv.Limit > 0 && len(vals) > recv.Limit { if recv.Limit > 0 && len(vals) > recv.Limit {
@ -512,12 +536,17 @@ func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.Tag
} }
respD := &vos.TagValueQueryResp{} respD := &vos.TagValueQueryResp{}
respD.Values = vals respD.Values = vals
timeTookSecond := time.Since(startTs).Seconds()
if timeTookSecond > pd.Section.SlowLogRecordSecond {
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryTagValues][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
}
return respD return respD
} }
// 对应prometheus 中的 /api/v1/label/<label_name>/values label_name == __name__ // 对应prometheus 中的 /api/v1/label/<label_name>/values label_name == __name__
func (pd *PromeDataSource) QueryMetrics(recv vos.MetricQueryParam) *vos.MetricQueryResp { func (pd *PromeDataSource) QueryMetrics(recv vos.MetricQueryParam) *vos.MetricQueryResp {
startTs := time.Now()
cj := &commonQueryObj{ cj := &commonQueryObj{
Idents: recv.Idents, Idents: recv.Idents,
Metric: recv.Metric, Metric: recv.Metric,
@ -544,18 +573,23 @@ func (pd *PromeDataSource) QueryMetrics(recv vos.MetricQueryParam) *vos.MetricQu
sets = append(sets, s) sets = append(sets, s)
set := storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge) set := storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
labelValuesSet := make(map[string]struct{}) labelValuesSet := make(map[string]struct{})
//for s.Next() { resultSeries := ""
thisSeriesNum := 0
for set.Next() { for set.Next() {
series := set.At() series := set.At()
thisSeriesNum++
for _, lb := range series.Labels() { for _, lb := range series.Labels() {
if lb.Name == LABEL_NAME { if lb.Name == LABEL_NAME {
labelValuesSet[lb.Value] = struct{}{} labelValuesSet[lb.Value] = struct{}{}
} }
} }
} }
vals := make([]string, 0) resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)
vals := make([]string, len(labelValuesSet))
i := 0
for val := range labelValuesSet { for val := range labelValuesSet {
vals = append(vals, val) vals[i] = val
i++
} }
sort.Strings(vals) sort.Strings(vals)
@ -564,11 +598,16 @@ func (pd *PromeDataSource) QueryMetrics(recv vos.MetricQueryParam) *vos.MetricQu
vals = vals[:recv.Limit] vals = vals[:recv.Limit]
} }
respD.Metrics = vals respD.Metrics = vals
timeTookSecond := time.Since(startTs).Seconds()
if timeTookSecond > pd.Section.SlowLogRecordSecond {
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryMetrics][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
}
return respD return respD
} }
// 对应prometheus 中的 /api/v1/series // 对应prometheus 中的 /api/v1/series
func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagPairQueryResp { func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagPairQueryResp {
startTs := time.Now()
respD := &vos.TagPairQueryResp{ respD := &vos.TagPairQueryResp{
TagPairs: make([]string, 0), TagPairs: make([]string, 0),
Idents: make([]string, 0), Idents: make([]string, 0),
@ -580,6 +619,7 @@ func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagP
Metric: "", Metric: "",
}) })
} }
resultSeries := ""
for _, x := range recv.Params { for _, x := range recv.Params {
cj := &commonQueryObj{ cj := &commonQueryObj{
Idents: x.Idents, Idents: x.Idents,
@ -606,8 +646,10 @@ func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagP
set := storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge) set := storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
labelIdents := make([]string, 0) labelIdents := make([]string, 0)
thisSeriesNum := 0
for set.Next() { for set.Next() {
series := s.At() series := s.At()
thisSeriesNum++
labelsS := series.Labels() labelsS := series.Labels()
for _, i := range labelsS { for _, i := range labelsS {
@ -628,13 +670,15 @@ func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagP
} }
} }
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)
} }
newTags := make([]string, 0) newTags := make([]string, len(tps))
i := 0
for k := range tps { for k := range tps {
newTags[i] = k
newTags = append(newTags, k) i++
} }
sort.Strings(newTags) sort.Strings(newTags)
@ -643,6 +687,10 @@ func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagP
} }
respD.TagPairs = newTags respD.TagPairs = newTags
timeTookSecond := time.Since(startTs).Seconds()
if timeTookSecond > pd.Section.SlowLogRecordSecond {
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryTagPairs][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
}
return respD return respD
} }

View File

@ -122,6 +122,8 @@ func Parse() error {
viper.SetDefault("trans.backend.prometheus.maxConcurrentQuery", 30) viper.SetDefault("trans.backend.prometheus.maxConcurrentQuery", 30)
viper.SetDefault("trans.backend.prometheus.maxSamples", 50000000) viper.SetDefault("trans.backend.prometheus.maxSamples", 50000000)
viper.SetDefault("trans.backend.prometheus.maxFetchAllSeriesLimitMinute", 5) viper.SetDefault("trans.backend.prometheus.maxFetchAllSeriesLimitMinute", 5)
viper.SetDefault("trans.backend.prometheus.slowLogRecordSecond", 3)
viper.SetDefault("trans.backend.prometheus.defaultFetchSeriesQl", `{__name__=~"system.*"}`)
viper.SetDefault("tpl.alertRulePath", "./etc/alert_rule") viper.SetDefault("tpl.alertRulePath", "./etc/alert_rule")
viper.SetDefault("tpl.dashboardPath", "./etc/dashboard") viper.SetDefault("tpl.dashboardPath", "./etc/dashboard")

View File

@ -91,6 +91,11 @@ trans:
lookbackDeltaMinute: 2 lookbackDeltaMinute: 2
# 查询全量索引时时间窗口限制,降低高基数 # 查询全量索引时时间窗口限制,降低高基数
maxFetchAllSeriesLimitMinute: 5 maxFetchAllSeriesLimitMinute: 5
# 查询接口耗时超过多少秒就打印warning日志记录
slowLogRecordSecond: 3
# remote_read时如果没有查询条件则用这条默认的ql查询
# 注意! ql匹配series越多造成的oom或者慢查询可能越大
defaultFetchSeriesQl: '{__name__=~"system.*"}'
remoteWrite: remoteWrite:
# m3db的配置 # m3db的配置
#- name: m3db01 #- name: m3db01