refactor transfer datasources for ui/judge, implement tsdb(+index) an… (#246)

* refactor transfer datasources for ui/judge, implement tsdb(+index) and influxdb

* fix error string; fix import identidy ; refactor pushendpoint init

* fix influx queryData

Co-authored-by: wangzhiguo04 <wangzhiguo04@meicai.cn>
This commit is contained in:
xingren23 2020-07-20 09:57:22 +08:00 committed by GitHub
parent b6169ac706
commit 520dda70c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 1991 additions and 906 deletions

View File

@ -3,6 +3,7 @@ query:
connTimeout: 1000 connTimeout: 1000
callTimeout: 2000 callTimeout: 2000
indexCallTimeout: 2000 indexCallTimeout: 2000
indexMod: "index"
redis: redis:
addrs: addrs:

View File

@ -1,10 +1,10 @@
backend: backend:
maxConns: 20000 datasource: "tsdb"
# in ms tsdb:
# connTimeout: 1000 enabled: true
# callTimeout: 3000 name: "tsdb"
cluster: cluster:
tsdb01: 127.0.0.1:5821 tsdb01: 127.0.0.1:5821
influxdb: influxdb:
enabled: false enabled: false
username: "influx" username: "influx"
@ -21,6 +21,11 @@ backend:
brokersPeers: "192.168.1.1:9092,192.168.1.2:9092" brokersPeers: "192.168.1.1:9092,192.168.1.2:9092"
topic: "n9e" topic: "n9e"
identity:
specify: ""
shell: ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1
logger: logger:
dir: logs/transfer dir: logs/transfer
level: WARNING level: WARNING

View File

@ -314,6 +314,19 @@ func PKWithCounter(endpoint, counter string) string {
return ret.String() return ret.String()
} }
func GetCounter(metric, tag string, tagMap map[string]string) (counter string, err error) {
if tagMap == nil {
tagMap, err = SplitTagsString(tag)
if err != nil {
return
}
}
tagStr := SortedTags(tagMap)
counter = PKWithTags(metric, tagStr)
return
}
func PKWithTags(metric, tags string) string { func PKWithTags(metric, tags string) string {
ret := bufferPool.Get().(*bytes.Buffer) ret := bufferPool.Get().(*bytes.Buffer)
ret.Reset() ret.Reset()

View File

@ -48,3 +48,56 @@ func (req *QueryData) Key() string {
func (resp *TsdbQueryResponse) Key() string { func (resp *TsdbQueryResponse) Key() string {
return resp.Endpoint + "/" + resp.Counter return resp.Endpoint + "/" + resp.Counter
} }
type EndpointsRecv struct {
Endpoints []string `json:"endpoints"`
}
type MetricResp struct {
Metrics []string `json:"metrics"`
}
type EndpointMetricRecv struct {
Endpoints []string `json:"endpoints"`
Metrics []string `json:"metrics"`
}
type IndexTagkvResp struct {
Endpoints []string `json:"endpoints"`
Metric string `json:"metric"`
Tagkv []*TagPair `json:"tagkv"`
}
type TagPair struct {
Key string `json:"tagk"` // json 和变量不一致为了兼容前端
Values []string `json:"tagv"`
}
type CludeRecv struct {
Endpoints []string `json:"endpoints"`
Metric string `json:"metric"`
Include []*TagPair `json:"include"`
Exclude []*TagPair `json:"exclude"`
}
type XcludeResp struct {
Endpoints []string `json:"endpoints"`
Metric string `json:"metric"`
Tags []string `json:"tags"`
Step int `json:"step"`
DsType string `json:"dstype"`
}
type IndexByFullTagsRecv struct {
Endpoints []string `json:"endpoints"`
Metric string `json:"metric"`
Tagkv []TagPair `json:"tagkv"`
}
type IndexByFullTagsResp struct {
Endpoints []string `json:"endpoints"`
Metric string `json:"metric"`
Tags []string `json:"tags"`
Step int `json:"step"`
DsType string `json:"dstype"`
}

View File

@ -40,7 +40,7 @@ func GetIndexLoop() {
} }
func GetIndex() { func GetIndex() {
instances, err := report.GetAlive("index", "monapi") instances, err := report.GetAlive(Config.IndexMod, "monapi")
if err != nil { if err != nil {
stats.Counter.Set("get.index.err", 1) stats.Counter.Set("get.index.err", 1)
logger.Warningf("get index list err:%v", err) logger.Warningf("get index list err:%v", err)

View File

@ -19,6 +19,7 @@ type SeriesQuerySection struct {
MaxIdle int `json:"maxIdle"` // MaxIdle int `json:"maxIdle"` //
ConnTimeout int `json:"connTimeout"` // 连接超时 ConnTimeout int `json:"connTimeout"` // 连接超时
CallTimeout int `json:"callTimeout"` // 请求超时 CallTimeout int `json:"callTimeout"` // 请求超时
IndexMod string `json:"indexMod"`
IndexPath string `json:"indexPath"` IndexPath string `json:"indexPath"`
IndexCallTimeout int `json:"indexCallTimeout"` // 请求超时 IndexCallTimeout int `json:"indexCallTimeout"` // 请求超时
} }

View File

@ -49,6 +49,7 @@ func Parse(conf string) error {
"connTimeout": 1000, "connTimeout": 1000,
"callTimeout": 2000, "callTimeout": 2000,
"indexCallTimeout": 2000, "indexCallTimeout": 2000,
"indexMod": "index",
"indexPath": "/api/index/counter/clude", "indexPath": "/api/index/counter/clude",
}) })

View File

@ -0,0 +1,84 @@
package backend
import (
"fmt"
"time"
"github.com/didi/nightingale/src/dataobj"
)
// send
const (
DefaultSendTaskSleepInterval = time.Millisecond * 50 //默认睡眠间隔为50ms
DefaultSendQueueMaxSize = 102400 //10.24w
MaxSendRetry = 10
)
var (
MinStep int //最小上报周期,单位sec
)
type DataSource interface {
PushEndpoint
// query data for judge
QueryData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse
// query data for ui
QueryDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse
// query metrics & tags
QueryMetrics(recv dataobj.EndpointsRecv) *dataobj.MetricResp
QueryTagPairs(recv dataobj.EndpointMetricRecv) []dataobj.IndexTagkvResp
QueryIndexByClude(recv []dataobj.CludeRecv) []dataobj.XcludeResp
QueryIndexByFullTags(recv []dataobj.IndexByFullTagsRecv) []dataobj.IndexByFullTagsResp
// tsdb instance
GetInstance(metric, endpoint string, tags map[string]string) []string
}
type PushEndpoint interface {
// push data
Push2Queue(items []*dataobj.MetricValue)
}
var registryDataSources map[string]DataSource
var registryPushEndpoints map[string]PushEndpoint
func init() {
registryDataSources = make(map[string]DataSource)
registryPushEndpoints = make(map[string]PushEndpoint)
}
// get backend datasource
// (pluginId == "" for default datasource)
func GetDataSourceFor(pluginId string) (DataSource, error) {
if pluginId == "" {
pluginId = defaultDataSource
}
if source, exists := registryDataSources[pluginId]; exists {
return source, nil
}
return nil, fmt.Errorf("could not find datasource for plugin: %s", pluginId)
}
// get all push endpoints
func GetPushEndpoints() ([]PushEndpoint, error) {
if len(registryPushEndpoints) > 0 {
items := make([]PushEndpoint, 0, len(registryPushEndpoints))
for _, value := range registryPushEndpoints {
items = append(items, value)
}
return items, nil
}
return nil, fmt.Errorf("could not find any pushendpoint")
}
func RegisterDataSource(pluginId string, datasource DataSource) {
registryDataSources[pluginId] = datasource
registryPushEndpoints[pluginId] = datasource
}
func RegisterPushEndpoint(pluginId string, push PushEndpoint) {
registryPushEndpoints[pluginId] = push
}

View File

@ -0,0 +1,187 @@
package influxdb
import (
"time"
"github.com/didi/nightingale/src/dataobj"
"github.com/didi/nightingale/src/toolkits/stats"
"github.com/influxdata/influxdb/client/v2"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/container/list"
"github.com/toolkits/pkg/logger"
)
type InfluxdbSection struct {
Enabled bool `yaml:"enabled"`
Name string `yaml:"name"`
Batch int `yaml:"batch"`
MaxRetry int `yaml:"maxRetry"`
WorkerNum int `yaml:"workerNum"`
Timeout int `yaml:"timeout"`
Address string `yaml:"address"`
Database string `yaml:"database"`
Username string `yaml:"username"`
Password string `yaml:"password"`
Precision string `yaml:"precision"`
}
type InfluxdbDataSource struct {
// config
Section InfluxdbSection
SendQueueMaxSize int
SendTaskSleepInterval time.Duration
// 发送缓存队列 node -> queue_of_data
InfluxdbQueue *list.SafeListLimited
}
func (influxdb *InfluxdbDataSource) Init() {
// init queue
if influxdb.Section.Enabled {
influxdb.InfluxdbQueue = list.NewSafeListLimited(influxdb.SendQueueMaxSize)
}
// init task
influxdbConcurrent := influxdb.Section.WorkerNum
if influxdbConcurrent < 1 {
influxdbConcurrent = 1
}
go influxdb.send2InfluxdbTask(influxdbConcurrent)
}
// 将原始数据插入到influxdb缓存队列
func (influxdb *InfluxdbDataSource) Push2Queue(items []*dataobj.MetricValue) {
errCnt := 0
for _, item := range items {
influxdbItem := influxdb.convert2InfluxdbItem(item)
isSuccess := influxdb.InfluxdbQueue.PushFront(influxdbItem)
if !isSuccess {
errCnt += 1
}
}
stats.Counter.Set("influxdb.queue.err", errCnt)
}
func (influxdb *InfluxdbDataSource) send2InfluxdbTask(concurrent int) {
batch := influxdb.Section.Batch // 一次发送,最多batch条数据
retry := influxdb.Section.MaxRetry
addr := influxdb.Section.Address
sema := semaphore.NewSemaphore(concurrent)
var err error
c, err := NewInfluxdbClient(influxdb.Section)
defer c.Client.Close()
if err != nil {
logger.Errorf("init influxdb client fail: %v", err)
return
}
for {
items := influxdb.InfluxdbQueue.PopBackBy(batch)
count := len(items)
if count == 0 {
time.Sleep(influxdb.SendTaskSleepInterval)
continue
}
influxdbItems := make([]*dataobj.InfluxdbItem, count)
for i := 0; i < count; i++ {
influxdbItems[i] = items[i].(*dataobj.InfluxdbItem)
stats.Counter.Set("points.out.influxdb", 1)
logger.Debug("send to influxdb: ", influxdbItems[i])
}
// 同步Call + 有限并发 进行发送
sema.Acquire()
go func(addr string, influxdbItems []*dataobj.InfluxdbItem, count int) {
defer sema.Release()
sendOk := false
for i := 0; i < retry; i++ {
err = c.Send(influxdbItems)
if err == nil {
sendOk = true
break
}
logger.Warningf("send influxdb fail: %v", err)
time.Sleep(time.Millisecond * 10)
}
if !sendOk {
stats.Counter.Set("points.out.influxdb.err", count)
logger.Errorf("send %v to influxdb %s fail: %v", influxdbItems, addr, err)
} else {
logger.Debugf("send to influxdb %s ok", addr)
}
}(addr, influxdbItems, count)
}
}
func (influxdb *InfluxdbDataSource) convert2InfluxdbItem(d *dataobj.MetricValue) *dataobj.InfluxdbItem {
t := dataobj.InfluxdbItem{Tags: make(map[string]string), Fields: make(map[string]interface{})}
for k, v := range d.TagsMap {
t.Tags[k] = v
}
t.Tags["endpoint"] = d.Endpoint
t.Measurement = d.Metric
t.Fields["value"] = d.Value
t.Timestamp = d.Timestamp
return &t
}
type InfluxClient struct {
Client client.Client
Database string
Precision string
}
func NewInfluxdbClient(section InfluxdbSection) (*InfluxClient, error) {
c, err := client.NewHTTPClient(client.HTTPConfig{
Addr: section.Address,
Username: section.Username,
Password: section.Password,
Timeout: time.Millisecond * time.Duration(section.Timeout),
})
if err != nil {
return nil, err
}
return &InfluxClient{
Client: c,
Database: section.Database,
Precision: section.Precision,
}, nil
}
func (c *InfluxClient) Send(items []*dataobj.InfluxdbItem) error {
bp, err := client.NewBatchPoints(client.BatchPointsConfig{
Database: c.Database,
Precision: c.Precision,
})
if err != nil {
logger.Error("create batch points error: ", err)
return err
}
for _, item := range items {
pt, err := client.NewPoint(item.Measurement, item.Tags, item.Fields, time.Unix(item.Timestamp, 0))
if err != nil {
logger.Error("create new points error: ", err)
continue
}
bp.AddPoint(pt)
}
return c.Client.Write(bp)
}
func (influxdb *InfluxdbDataSource) GetInstance(metric, endpoint string, tags map[string]string) []string {
// influxdb 单实例 或 influx-proxy
return []string{influxdb.Section.Address}
}

View File

@ -0,0 +1,169 @@
package influxdb
import (
"fmt"
"strings"
"time"
"github.com/didi/nightingale/src/dataobj"
"github.com/toolkits/pkg/logger"
)
type ShowSeries struct {
Database string
Metric string
Endpoints []string
Include []*dataobj.TagPair
Exclude []*dataobj.TagPair
Start int64
End int64
RawQuery string
}
func (query *ShowSeries) renderShow() {
query.RawQuery = fmt.Sprintf("SHOW SERIES ON \"%s\" FROM \"%s\"", query.Database,
query.Metric)
}
func (query *ShowSeries) renderEndpoints() {
if len(query.Endpoints) > 0 {
// endpoints
endpointPart := "("
for _, endpoint := range query.Endpoints {
endpointPart += fmt.Sprintf(" \"endpoint\"='%s' OR", endpoint)
}
endpointPart = endpointPart[:len(endpointPart)-len("OR")]
endpointPart += ")"
query.RawQuery = fmt.Sprintf("\"%s\" WHERE \"%s\"", query.RawQuery, endpointPart)
}
}
func (query *ShowSeries) renderInclude() {
if len(query.Include) > 0 {
// include
includePart := "("
for _, include := range query.Include {
for _, value := range include.Values {
includePart += fmt.Sprintf(" \"%s\"='%s' OR", include.Key, value)
}
}
includePart = includePart[:len(includePart)-len("OR")]
includePart += ")"
if !strings.Contains(query.RawQuery, "WHERE") {
query.RawQuery += " WHERE"
}
query.RawQuery = fmt.Sprintf(" %s AND %s", query.RawQuery, includePart)
}
}
func (query *ShowSeries) renderExclude() {
if len(query.Exclude) > 0 {
// exclude
excludePart := "("
for _, exclude := range query.Exclude {
for _, value := range exclude.Values {
excludePart += fmt.Sprintf(" \"%s\"='%s' OR", exclude.Key, value)
}
}
excludePart = excludePart[:len(excludePart)-len("OR")]
excludePart += ")"
if !strings.Contains(query.RawQuery, "WHERE") {
query.RawQuery += " WHERE"
}
query.RawQuery = fmt.Sprintf(" %s AND %s", query.RawQuery, excludePart)
}
}
func (query *ShowSeries) renderTimeRange() {
// time
if strings.Contains(query.RawQuery, "WHERE") {
query.RawQuery = fmt.Sprintf("%s AND time >= %d AND time <= %d", query.RawQuery,
time.Duration(query.Start)*time.Second,
time.Duration(query.End)*time.Second)
} else {
query.RawQuery = fmt.Sprintf("%s WHERE time >= %d AND time <= %d", query.RawQuery,
time.Duration(query.Start)*time.Second,
time.Duration(query.End)*time.Second)
}
}
type QueryData struct {
Start int64
End int64
Metric string
Endpoints []string
Tags []string
Step int
DsType string
GroupKey []string //聚合维度
AggrFunc string //聚合计算
RawQuery string
}
func (query *QueryData) renderSelect() {
// select
if query.AggrFunc != "" && len(query.GroupKey) > 0 {
query.RawQuery = ""
} else {
query.RawQuery = fmt.Sprintf("SELECT \"value\" FROM \"%s\"", query.Metric)
}
}
func (query *QueryData) renderEndpoints() {
// where endpoint
if len(query.Endpoints) > 0 {
endpointPart := "("
for _, endpoint := range query.Endpoints {
endpointPart += fmt.Sprintf(" \"endpoint\"='%s' OR", endpoint)
}
endpointPart = endpointPart[:len(endpointPart)-len("OR")]
endpointPart += ")"
query.RawQuery = fmt.Sprintf("%s WHERE %s", query.RawQuery, endpointPart)
}
}
func (query *QueryData) renderTags() {
// where tags
if len(query.Tags) > 0 {
s := strings.Join(query.Tags, ",")
tags, err := dataobj.SplitTagsString(s)
if err != nil {
logger.Warningf("split tags error, %+v", err)
return
}
tagPart := "("
for tagK, tagV := range tags {
tagPart += fmt.Sprintf(" \"%s\"='%s' AND", tagK, tagV)
}
tagPart = tagPart[:len(tagPart)-len("AND")]
tagPart += ")"
if strings.Contains(query.RawQuery, "WHERE") {
query.RawQuery = fmt.Sprintf("%s AND %s", query.RawQuery, tagPart)
} else {
query.RawQuery = fmt.Sprintf("%s WHERE %s", query.RawQuery, tagPart)
}
}
}
func (query *QueryData) renderTimeRange() {
// time
if strings.Contains(query.RawQuery, "WHERE") {
query.RawQuery = fmt.Sprintf("%s AND time >= %d AND time <= %d", query.RawQuery,
time.Duration(query.Start)*time.Second,
time.Duration(query.End)*time.Second)
} else {
query.RawQuery = fmt.Sprintf("%s WHERE time >= %d AND time <= %d", query.RawQuery, query.Start, query.End)
}
}
func (query *QueryData) renderGroupBy() {
// group by
if len(query.GroupKey) > 0 {
groupByPart := strings.Join(query.GroupKey, ",")
query.RawQuery = fmt.Sprintf("%s GROUP BY %s", query.RawQuery, groupByPart)
}
}

View File

@ -0,0 +1,426 @@
package influxdb
import (
"encoding/json"
"fmt"
"strings"
"time"
"github.com/influxdata/influxdb/models"
"github.com/didi/nightingale/src/dataobj"
"github.com/influxdata/influxdb/client/v2"
"github.com/toolkits/pkg/logger"
)
// select value from metric where ...
func (influxdb *InfluxdbDataSource) QueryData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse {
logger.Debugf("query data, inputs: %+v", inputs)
c, err := NewInfluxdbClient(influxdb.Section)
defer c.Client.Close()
if err != nil {
logger.Errorf("init influxdb client fail: %v", err)
return nil
}
queryResponse := make([]*dataobj.TsdbQueryResponse, 0)
for _, input := range inputs {
for _, counter := range input.Counters {
items := strings.Split(counter, "/")
metric := items[0]
var tags = make([]string, 0)
if len(items) > 1 && len(items[1]) > 0 {
tags = strings.Split(items[1], ",")
}
influxdbQuery := QueryData{
Start: input.Start,
End: input.End,
Metric: metric,
Endpoints: input.Endpoints,
Tags: tags,
Step: input.Step,
DsType: input.DsType,
}
influxdbQuery.renderSelect()
influxdbQuery.renderEndpoints()
influxdbQuery.renderTags()
influxdbQuery.renderTimeRange()
logger.Debugf("query influxql %s", influxdbQuery.RawQuery)
query := client.NewQuery(influxdbQuery.RawQuery, c.Database, c.Precision)
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
for _, result := range response.Results {
for _, series := range result.Series {
// fixme : influx client get series.Tags is nil
endpoint := series.Tags["endpoint"]
delete(series.Tags, endpoint)
counter, err := dataobj.GetCounter(series.Name, "", series.Tags)
if err != nil {
logger.Warningf("get counter error: %+v", err)
continue
}
values := convertValues(series)
resp := &dataobj.TsdbQueryResponse{
Start: influxdbQuery.Start,
End: influxdbQuery.End,
Endpoint: endpoint,
Counter: counter,
DsType: influxdbQuery.DsType,
Step: influxdbQuery.Step,
Values: values,
}
queryResponse = append(queryResponse, resp)
}
}
}
}
}
return queryResponse
}
// todo : 支持 comparison
// select value from metric where ...
func (influxdb *InfluxdbDataSource) QueryDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse {
logger.Debugf("query data for ui, input: %+v", input)
c, err := NewInfluxdbClient(influxdb.Section)
defer c.Client.Close()
if err != nil {
logger.Errorf("init influxdb client fail: %v", err)
return nil
}
influxdbQuery := QueryData{
Start: input.Start,
End: input.End,
Metric: input.Metric,
Endpoints: input.Endpoints,
Tags: input.Tags,
Step: input.Step,
DsType: input.DsType,
GroupKey: input.GroupKey,
AggrFunc: input.AggrFunc,
}
influxdbQuery.renderSelect()
influxdbQuery.renderEndpoints()
influxdbQuery.renderTags()
influxdbQuery.renderTimeRange()
influxdbQuery.renderGroupBy()
logger.Debugf("query influxql %s", influxdbQuery.RawQuery)
queryResponse := make([]*dataobj.TsdbQueryResponse, 0)
query := client.NewQuery(influxdbQuery.RawQuery, c.Database, c.Precision)
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
for _, result := range response.Results {
for _, series := range result.Series {
// fixme : influx client get series.Tags is nil
endpoint := series.Tags["endpoint"]
delete(series.Tags, endpoint)
counter, err := dataobj.GetCounter(series.Name, "", series.Tags)
if err != nil {
logger.Warningf("get counter error: %+v", err)
continue
}
values := convertValues(series)
resp := &dataobj.TsdbQueryResponse{
Start: influxdbQuery.Start,
End: influxdbQuery.End,
Endpoint: endpoint,
Counter: counter,
DsType: influxdbQuery.DsType,
Step: influxdbQuery.Step,
Values: values,
}
queryResponse = append(queryResponse, resp)
}
}
}
return queryResponse
}
// show measurements on n9e
func (influxdb *InfluxdbDataSource) QueryMetrics(recv dataobj.EndpointsRecv) *dataobj.MetricResp {
logger.Debugf("query metric, recv: %+v", recv)
c, err := NewInfluxdbClient(influxdb.Section)
defer c.Client.Close()
if err != nil {
logger.Errorf("init influxdb client fail: %v", err)
return nil
}
influxql := fmt.Sprintf("SHOW MEASUREMENTS ON \"%s\"", influxdb.Section.Database)
query := client.NewQuery(influxql, c.Database, c.Precision)
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
resp := &dataobj.MetricResp{
Metrics: make([]string, 0),
}
for _, result := range response.Results {
for _, series := range result.Series {
for _, valuePair := range series.Values {
metric := valuePair[0].(string)
resp.Metrics = append(resp.Metrics, metric)
}
}
}
return resp
}
return nil
}
// show tag keys / values from metric ...
func (influxdb *InfluxdbDataSource) QueryTagPairs(recv dataobj.EndpointMetricRecv) []dataobj.IndexTagkvResp {
logger.Debugf("query tag pairs, recv: %+v", recv)
c, err := NewInfluxdbClient(influxdb.Section)
defer c.Client.Close()
if err != nil {
logger.Errorf("init influxdb client fail: %v", err)
return nil
}
resp := make([]dataobj.IndexTagkvResp, 0)
for _, metric := range recv.Metrics {
tagkvResp := dataobj.IndexTagkvResp{
Endpoints: recv.Endpoints,
Metric: metric,
Tagkv: make([]*dataobj.TagPair, 0),
}
// show tag keys
keys := showTagKeys(c, metric, influxdb.Section.Database)
if len(keys) > 0 {
// show tag values
tagkvResp.Tagkv = showTagValues(c, keys, metric, influxdb.Section.Database)
}
resp = append(resp, tagkvResp)
}
return resp
}
// show tag keys on n9e from metric where ...
// (exclude default endpoint tag)
func showTagKeys(c *InfluxClient, metric, database string) []string {
keys := make([]string, 0)
influxql := fmt.Sprintf("SHOW TAG KEYS ON \"%s\" FROM \"%s\"", database, metric)
query := client.NewQuery(influxql, c.Database, c.Precision)
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
for _, result := range response.Results {
for _, series := range result.Series {
for _, valuePair := range series.Values {
tagKey := valuePair[0].(string)
// 去掉默认tag endpoint
if tagKey != "endpoint" {
keys = append(keys, tagKey)
}
}
}
}
}
return keys
}
// show tag values on n9e from metric where ...
func showTagValues(c *InfluxClient, keys []string, metric, database string) []*dataobj.TagPair {
tagkv := make([]*dataobj.TagPair, 0)
influxql := fmt.Sprintf("SHOW TAG VALUES ON \"%s\" FROM \"%s\" WITH KEY in (\"%s\")",
database,
metric, strings.Join(keys, "\",\""))
query := client.NewQuery(influxql, c.Database, c.Precision)
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
tagPairs := make(map[string]*dataobj.TagPair)
for _, result := range response.Results {
for _, series := range result.Series {
for _, valuePair := range series.Values {
tagKey := valuePair[0].(string)
tagValue := valuePair[1].(string)
if pair, exist := tagPairs[tagKey]; exist {
pair.Values = append(pair.Values, tagValue)
} else {
pair := &dataobj.TagPair{
Key: tagKey,
Values: []string{tagValue},
}
tagPairs[pair.Key] = pair
tagkv = append(tagkv, pair)
}
}
}
}
}
return tagkv
}
// show series from metric where ...
func (influxdb *InfluxdbDataSource) QueryIndexByClude(recvs []dataobj.CludeRecv) []dataobj.XcludeResp {
logger.Debugf("query IndexByClude , recv: %+v", recvs)
c, err := NewInfluxdbClient(influxdb.Section)
defer c.Client.Close()
if err != nil {
logger.Errorf("init influxdb client fail: %v", err)
return nil
}
resp := make([]dataobj.XcludeResp, 0)
for _, recv := range recvs {
xcludeResp := dataobj.XcludeResp{
Endpoints: recv.Endpoints,
Metric: recv.Metric,
Tags: make([]string, 0),
Step: -1, // fixme
DsType: "GAUGE",
}
if len(recv.Endpoints) == 0 {
resp = append(resp, xcludeResp)
continue
}
showSeries := ShowSeries{
Database: influxdb.Section.Database,
Metric: recv.Metric,
Endpoints: recv.Endpoints,
Start: time.Now().AddDate(0, 0, -30).Unix(),
End: time.Now().Unix(),
Include: recv.Include,
Exclude: recv.Exclude,
}
showSeries.renderShow()
showSeries.renderEndpoints()
showSeries.renderInclude()
showSeries.renderExclude()
query := client.NewQuery(showSeries.RawQuery, c.Database, c.Precision)
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
for _, result := range response.Results {
for _, series := range result.Series {
for _, valuePair := range series.Values {
// proc.port.listen,endpoint=localhost,port=22,service=sshd
tagKey := valuePair[0].(string)
// process
items := strings.Split(tagKey, ",")
newItems := make([]string, 0)
for _, item := range items {
if item != recv.Metric && !strings.Contains(item, "endpoint") {
newItems = append(newItems, item)
}
}
if len(newItems) > 0 {
if tags, err := dataobj.SplitTagsString(strings.Join(newItems, ",")); err == nil {
xcludeResp.Tags = append(xcludeResp.Tags, dataobj.SortedTags(tags))
}
}
}
}
}
}
resp = append(resp, xcludeResp)
}
return resp
}
// show series from metric where ...
func (influxdb *InfluxdbDataSource) QueryIndexByFullTags(recvs []dataobj.IndexByFullTagsRecv) []dataobj.
IndexByFullTagsResp {
logger.Debugf("query IndexByFullTags , recv: %+v", recvs)
c, err := NewInfluxdbClient(influxdb.Section)
defer c.Client.Close()
if err != nil {
logger.Errorf("init influxdb client fail: %v", err)
return nil
}
resp := make([]dataobj.IndexByFullTagsResp, 0)
for _, recv := range recvs {
fullTagResp := dataobj.IndexByFullTagsResp{
Endpoints: recv.Endpoints,
Metric: recv.Metric,
Tags: make([]string, 0),
Step: -1, // FIXME
DsType: "GAUGE",
}
// 兼容夜莺逻辑不选择endpoint则返回空
if len(recv.Endpoints) == 0 {
resp = append(resp, fullTagResp)
continue
}
// build influxql
influxdbShow := ShowSeries{
Database: influxdb.Section.Database,
Metric: recv.Metric,
Endpoints: recv.Endpoints,
Start: time.Now().AddDate(0, 0, -30).Unix(),
End: time.Now().Unix(),
}
influxdbShow.renderShow()
influxdbShow.renderEndpoints()
influxdbShow.renderTimeRange()
// do query
query := client.NewQuery(influxdbShow.RawQuery, c.Database, c.Precision)
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
for _, result := range response.Results {
for _, series := range result.Series {
for _, valuePair := range series.Values {
// proc.port.listen,endpoint=localhost,port=22,service=sshd
tagKey := valuePair[0].(string)
// process
items := strings.Split(tagKey, ",")
newItems := make([]string, 0)
for _, item := range items {
if item != recv.Metric && !strings.Contains(item, "endpoint") {
newItems = append(newItems, item)
}
}
if len(newItems) > 0 {
if tags, err := dataobj.SplitTagsString(strings.Join(newItems, ",")); err == nil {
fullTagResp.Tags = append(fullTagResp.Tags, dataobj.SortedTags(tags))
}
}
}
}
}
}
resp = append(resp, fullTagResp)
}
return resp
}
func convertValues(series models.Row) []*dataobj.RRDData {
// convert values
values := make([]*dataobj.RRDData, 0, len(series.Values))
for _, valuePair := range series.Values {
timestampNumber, _ := valuePair[0].(json.Number)
timestamp, _ := timestampNumber.Int64()
valueNumber, _ := valuePair[1].(json.Number)
valueFloat, _ := valueNumber.Float64()
values = append(values, dataobj.NewRRDData(timestamp, valueFloat))
}
return values
}

View File

@ -1,166 +1,76 @@
package backend package backend
import ( import (
"github.com/toolkits/pkg/container/list" "github.com/didi/nightingale/src/modules/transfer/backend/influxdb"
"github.com/toolkits/pkg/container/set" "github.com/didi/nightingale/src/modules/transfer/backend/tsdb"
"github.com/toolkits/pkg/str"
"github.com/didi/nightingale/src/modules/transfer/cache"
"github.com/didi/nightingale/src/toolkits/pools"
"github.com/didi/nightingale/src/toolkits/report"
"github.com/didi/nightingale/src/toolkits/stats"
) )
type InfluxdbSection struct {
Enabled bool `yaml:"enabled"`
Batch int `yaml:"batch"`
MaxRetry int `yaml:"maxRetry"`
WorkerNum int `yaml:"workerNum"`
Timeout int `yaml:"timeout"`
Address string `yaml:"address"`
Database string `yaml:"database"`
Username string `yaml:"username"`
Password string `yaml:"password"`
Precision string `yaml:"precision"`
}
type OpenTsdbSection struct {
Enabled bool `yaml:"enabled"`
Batch int `yaml:"batch"`
ConnTimeout int `yaml:"connTimeout"`
CallTimeout int `yaml:"callTimeout"`
WorkerNum int `yaml:"workerNum"`
MaxConns int `yaml:"maxConns"`
MaxIdle int `yaml:"maxIdle"`
MaxRetry int `yaml:"maxRetry"`
Address string `yaml:"address"`
}
type KafkaSection struct {
Enabled bool `yaml:"enabled"`
Topic string `yaml:"topic"`
BrokersPeers string `yaml:"brokersPeers"`
SaslUser string `yaml:"saslUser"`
SaslPasswd string `yaml:"saslPasswd"`
Retry int `yaml:"retry"`
KeepAlive int64 `yaml:"keepAlive"`
}
type BackendSection struct { type BackendSection struct {
Enabled bool `yaml:"enabled"` DataSource string `yaml:"datasource"`
Batch int `yaml:"batch"` StraPath string `yaml:"straPath"`
ConnTimeout int `yaml:"connTimeout"`
CallTimeout int `yaml:"callTimeout"`
WorkerNum int `yaml:"workerNum"`
MaxConns int `yaml:"maxConns"`
MaxIdle int `yaml:"maxIdle"`
IndexTimeout int `yaml:"indexTimeout"`
StraPath string `yaml:"straPath"`
HbsMod string `yaml:"hbsMod"`
Replicas int `yaml:"replicas"` Judge JudgeSection `yaml:"judge"`
Cluster map[string]string `yaml:"cluster"` Tsdb tsdb.TsdbSection `yaml:"tsdb"`
ClusterList map[string]*ClusterNode `json:"clusterList"` Influxdb influxdb.InfluxdbSection `yaml:"influxdb"`
Influxdb InfluxdbSection `yaml:"influxdb"` OpenTsdb OpenTsdbSection `yaml:"opentsdb"`
OpenTsdb OpenTsdbSection `yaml:"opentsdb"` Kafka KafkaSection `yaml:"kafka"`
Kafka KafkaSection `yaml:"kafka"`
}
const DefaultSendQueueMaxSize = 102400 //10.24w
type ClusterNode struct {
Addrs []string `json:"addrs"`
} }
var ( var (
Config BackendSection defaultDataSource string
// 服务节点的一致性哈希环 pk -> node StraPath string
TsdbNodeRing *ConsistentHashRing tsdbDataSource *tsdb.TsdbDataSource
openTSDBPushEndpoint *OpenTsdbPushEndpoint
// 发送缓存队列 node -> queue_of_data influxdbDataSource *influxdb.InfluxdbDataSource
TsdbQueues = make(map[string]*list.SafeListLimited) kafkaPushEndpoint *KafkaPushEndpoint
JudgeQueues = cache.SafeJudgeQueue{}
InfluxdbQueue *list.SafeListLimited
OpenTsdbQueue *list.SafeListLimited
KafkaQueue = make(chan KafkaData, 10)
// 连接池 node_address -> connection_pool
TsdbConnPools *pools.ConnPools
JudgeConnPools *pools.ConnPools
OpenTsdbConnPoolHelper *pools.OpenTsdbConnPoolHelper
connTimeout int32
callTimeout int32
) )
func Init(cfg BackendSection) { func Init(cfg BackendSection) {
Config = cfg defaultDataSource = cfg.DataSource
// 初始化默认参数 StraPath = cfg.StraPath
connTimeout = int32(Config.ConnTimeout)
callTimeout = int32(Config.CallTimeout)
initHashRing() // init judge
initConnPools() InitJudge(cfg.Judge)
initSendQueues()
startSendTasks() // init tsdb
} if cfg.Tsdb.Enabled {
tsdbDataSource = &tsdb.TsdbDataSource{
func initHashRing() { Section: cfg.Tsdb,
TsdbNodeRing = NewConsistentHashRing(int32(Config.Replicas), str.KeysOfMap(Config.Cluster)) SendQueueMaxSize: DefaultSendQueueMaxSize,
} SendTaskSleepInterval: DefaultSendTaskSleepInterval,
func initConnPools() {
tsdbInstances := set.NewSafeSet()
for _, item := range Config.ClusterList {
for _, addr := range item.Addrs {
tsdbInstances.Add(addr)
} }
tsdbDataSource.Init() // register
RegisterDataSource(tsdbDataSource.Section.Name, tsdbDataSource)
} }
TsdbConnPools = pools.NewConnPools(
Config.MaxConns, Config.MaxIdle, Config.ConnTimeout, Config.CallTimeout, tsdbInstances.ToSlice(),
)
JudgeConnPools = pools.NewConnPools( // init influxdb
Config.MaxConns, Config.MaxIdle, Config.ConnTimeout, Config.CallTimeout, GetJudges(), if cfg.Influxdb.Enabled {
) influxdbDataSource = &influxdb.InfluxdbDataSource{
if Config.OpenTsdb.Enabled { Section: cfg.Influxdb,
OpenTsdbConnPoolHelper = pools.NewOpenTsdbConnPoolHelper(Config.OpenTsdb.Address, Config.OpenTsdb.MaxConns, Config.OpenTsdb.MaxIdle, Config.OpenTsdb.ConnTimeout, Config.OpenTsdb.CallTimeout) SendQueueMaxSize: DefaultSendQueueMaxSize,
} SendTaskSleepInterval: DefaultSendTaskSleepInterval,
}
func initSendQueues() {
for node, item := range Config.ClusterList {
for _, addr := range item.Addrs {
TsdbQueues[node+addr] = list.NewSafeListLimited(DefaultSendQueueMaxSize)
} }
} influxdbDataSource.Init()
// register
RegisterDataSource(influxdbDataSource.Section.Name, influxdbDataSource)
JudgeQueues = cache.NewJudgeQueue()
judges := GetJudges()
for _, judge := range judges {
JudgeQueues.Set(judge, list.NewSafeListLimited(DefaultSendQueueMaxSize))
} }
// init opentsdb
if Config.Influxdb.Enabled { if cfg.OpenTsdb.Enabled {
InfluxdbQueue = list.NewSafeListLimited(DefaultSendQueueMaxSize) openTSDBPushEndpoint = &OpenTsdbPushEndpoint{
Section: cfg.OpenTsdb,
}
openTSDBPushEndpoint.Init()
// register
RegisterPushEndpoint(openTSDBPushEndpoint.Section.Name, openTSDBPushEndpoint)
} }
// init kafka
if Config.OpenTsdb.Enabled { if cfg.Kafka.Enabled {
OpenTsdbQueue = list.NewSafeListLimited(DefaultSendQueueMaxSize) kafkaPushEndpoint = &KafkaPushEndpoint{
Section: cfg.Kafka,
}
kafkaPushEndpoint.Init()
// register
RegisterPushEndpoint(kafkaPushEndpoint.Section.Name, kafkaPushEndpoint)
} }
} }
func GetJudges() []string {
var judgeInstances []string
instances, err := report.GetAlive("judge", Config.HbsMod)
if err != nil {
stats.Counter.Set("judge.get.err", 1)
return judgeInstances
}
for _, instance := range instances {
judgeInstance := instance.Identity + ":" + instance.RPCPort
judgeInstances = append(judgeInstances, judgeInstance)
}
return judgeInstances
}

View File

@ -0,0 +1,191 @@
package backend
import (
"time"
"github.com/didi/nightingale/src/dataobj"
"github.com/didi/nightingale/src/model"
"github.com/didi/nightingale/src/modules/transfer/cache"
"github.com/didi/nightingale/src/toolkits/pools"
"github.com/didi/nightingale/src/toolkits/report"
"github.com/didi/nightingale/src/toolkits/stats"
"github.com/didi/nightingale/src/toolkits/str"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/container/list"
"github.com/toolkits/pkg/logger"
)
type JudgeSection struct {
Batch int `yaml:"batch"`
ConnTimeout int `yaml:"connTimeout"`
CallTimeout int `yaml:"callTimeout"`
WorkerNum int `yaml:"workerNum"`
MaxConns int `yaml:"maxConns"`
MaxIdle int `yaml:"maxIdle"`
HbsMod string `yaml:"hbsMod"`
}
var (
// config
Judge JudgeSection
// 连接池 node_address -> connection_pool
JudgeConnPools *pools.ConnPools
// queue
JudgeQueues = cache.SafeJudgeQueue{}
)
func InitJudge(section JudgeSection) {
Judge = section
judges := GetJudges()
// init connPool
JudgeConnPools = pools.NewConnPools(Judge.MaxConns, Judge.MaxIdle, Judge.ConnTimeout, Judge.CallTimeout, judges)
// init queue
JudgeQueues = cache.NewJudgeQueue()
for _, judgeNode := range judges {
JudgeQueues.Set(judgeNode, list.NewSafeListLimited(DefaultSendQueueMaxSize))
}
// start task
judgeConcurrent := Judge.WorkerNum
if judgeConcurrent < 1 {
judgeConcurrent = 1
}
judgeQueue := JudgeQueues.GetAll()
for instance, queue := range judgeQueue {
go Send2JudgeTask(queue, instance, judgeConcurrent)
}
}
func Send2JudgeTask(Q *list.SafeListLimited, addr string, concurrent int) {
batch := Judge.Batch
sema := semaphore.NewSemaphore(concurrent)
for {
items := Q.PopBackBy(batch)
count := len(items)
if count == 0 {
time.Sleep(DefaultSendTaskSleepInterval)
continue
}
judgeItems := make([]*dataobj.JudgeItem, count)
stats.Counter.Set("points.out.judge", count)
for i := 0; i < count; i++ {
judgeItems[i] = items[i].(*dataobj.JudgeItem)
logger.Debug("send to judge: ", judgeItems[i])
}
sema.Acquire()
go func(addr string, judgeItems []*dataobj.JudgeItem, count int) {
defer sema.Release()
resp := &dataobj.SimpleRpcResponse{}
var err error
sendOk := false
for i := 0; i < MaxSendRetry; i++ {
err = JudgeConnPools.Call(addr, "Judge.Send", judgeItems, resp)
if err == nil {
sendOk = true
break
}
logger.Warningf("send judge %s fail: %v", addr, err)
time.Sleep(time.Millisecond * 10)
}
if !sendOk {
stats.Counter.Set("points.out.err", count)
for _, item := range judgeItems {
logger.Errorf("send %v to judge %s fail: %v", item, addr, err)
}
}
}(addr, judgeItems, count)
}
}
func Push2JudgeQueue(items []*dataobj.MetricValue) {
errCnt := 0
for _, item := range items {
key := str.PK(item.Metric, item.Endpoint)
stras := cache.StraMap.GetByKey(key)
for _, stra := range stras {
if !TagMatch(stra.Tags, item.TagsMap) {
continue
}
judgeItem := &dataobj.JudgeItem{
Endpoint: item.Endpoint,
Metric: item.Metric,
Value: item.Value,
Timestamp: item.Timestamp,
DsType: item.CounterType,
Tags: item.Tags,
TagsMap: item.TagsMap,
Step: int(item.Step),
Sid: stra.Id,
Extra: item.Extra,
}
q, exists := JudgeQueues.Get(stra.JudgeInstance)
if exists {
if !q.PushFront(judgeItem) {
errCnt += 1
}
}
}
}
stats.Counter.Set("judge.queue.err", errCnt)
}
func alignTs(ts int64, period int64) int64 {
return ts - ts%period
}
func TagMatch(straTags []model.Tag, tag map[string]string) bool {
for _, stag := range straTags {
if _, exists := tag[stag.Tkey]; !exists {
return false
}
var match bool
if stag.Topt == "=" { //当前策略 tagkey 对应的 tagv
for _, v := range stag.Tval {
if tag[stag.Tkey] == v {
match = true
break
}
}
} else {
match = true
for _, v := range stag.Tval {
if tag[stag.Tkey] == v {
match = false
return match
}
}
}
if !match {
return false
}
}
return true
}
func GetJudges() []string {
var judgeInstances []string
instances, err := report.GetAlive("judge", Judge.HbsMod)
if err != nil {
stats.Counter.Set("judge.get.err", 1)
return judgeInstances
}
for _, instance := range instances {
judgeInstance := instance.Identity + ":" + instance.RPCPort
judgeInstances = append(judgeInstances, judgeInstance)
}
return judgeInstances
}

View File

@ -4,13 +4,81 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"github.com/Shopify/sarama"
"github.com/toolkits/pkg/logger"
"os" "os"
"strings" "strings"
"time" "time"
"github.com/Shopify/sarama"
"github.com/didi/nightingale/src/dataobj"
"github.com/didi/nightingale/src/toolkits/stats"
"github.com/toolkits/pkg/logger"
) )
type KafkaSection struct {
Enabled bool `yaml:"enabled"`
Name string `yaml:"name"`
Topic string `yaml:"topic"`
BrokersPeers string `yaml:"brokersPeers"`
ConnTimeout int `yaml:"connTimeout"`
CallTimeout int `yaml:"callTimeout"`
MaxRetry int `yaml:"maxRetry"`
KeepAlive int64 `yaml:"keepAlive"`
SaslUser string `yaml:"saslUser"`
SaslPasswd string `yaml:"saslPasswd"`
}
type KafkaPushEndpoint struct {
// config
Section KafkaSection
// 发送缓存队列 node -> queue_of_data
KafkaQueue chan KafkaData
}
func (kafka *KafkaPushEndpoint) Init() {
// init queue
kafka.KafkaQueue = make(chan KafkaData, 10)
// start task
go kafka.send2KafkaTask()
}
func (kafka *KafkaPushEndpoint) Push2Queue(items []*dataobj.MetricValue) {
for _, item := range items {
kafka.KafkaQueue <- kafka.convert2KafkaItem(item)
}
}
func (kafka *KafkaPushEndpoint) convert2KafkaItem(d *dataobj.MetricValue) KafkaData {
m := make(KafkaData)
m["metric"] = d.Metric
m["value"] = d.Value
m["timestamp"] = d.Timestamp
m["value"] = d.Value
m["step"] = d.Step
m["endpoint"] = d.Endpoint
m["tags"] = d.Tags
return m
}
func (kafka *KafkaPushEndpoint) send2KafkaTask() {
kf, err := NewKfClient(kafka.Section)
if err != nil {
logger.Errorf("init kafka client fail: %v", err)
return
}
defer kf.Close()
for {
kafkaItem := <-kafka.KafkaQueue
stats.Counter.Set("points.out.kafka", 1)
err = kf.Send(kafkaItem)
if err != nil {
stats.Counter.Set("points.out.kafka.err", 1)
logger.Errorf("send %v to kafka %s fail: %v", kafkaItem, kafka.Section.BrokersPeers, err)
}
}
}
type KafkaData map[string]interface{} type KafkaData map[string]interface{}
type KfClient struct { type KfClient struct {
producer sarama.AsyncProducer producer sarama.AsyncProducer
@ -45,11 +113,11 @@ func NewKfClient(c KafkaSection) (kafkaSender *KfClient, err error) {
cfg.Net.SASL.User = c.SaslUser cfg.Net.SASL.User = c.SaslUser
cfg.Net.SASL.Password = c.SaslPasswd cfg.Net.SASL.Password = c.SaslPasswd
} }
if c.Retry > 0 { if c.MaxRetry > 0 {
cfg.Producer.Retry.Max = c.Retry cfg.Producer.Retry.Max = c.MaxRetry
} }
cfg.Net.DialTimeout = time.Duration(connTimeout) * time.Millisecond cfg.Net.DialTimeout = time.Duration(c.ConnTimeout) * time.Millisecond
if c.KeepAlive > 0 { if c.KeepAlive > 0 {
cfg.Net.KeepAlive = time.Duration(c.KeepAlive) * time.Millisecond cfg.Net.KeepAlive = time.Duration(c.KeepAlive) * time.Millisecond
@ -58,10 +126,11 @@ func NewKfClient(c KafkaSection) (kafkaSender *KfClient, err error) {
if err != nil { if err != nil {
return return
} }
kafkaSender = newSender(brokers, topic, cfg, producer) kafkaSender = newSender(brokers, topic, cfg, producer, c.CallTimeout)
return return
} }
func newSender(brokers []string, topic string, cfg *sarama.Config, producer sarama.AsyncProducer) (kf *KfClient) { func newSender(brokers []string, topic string, cfg *sarama.Config, producer sarama.AsyncProducer,
callTimeout int) (kf *KfClient) {
kf = &KfClient{ kf = &KfClient{
producer: producer, producer: producer,
Topic: topic, Topic: topic,

View File

@ -0,0 +1,136 @@
package backend
import (
"bytes"
"time"
"github.com/didi/nightingale/src/dataobj"
"github.com/didi/nightingale/src/toolkits/pools"
"github.com/didi/nightingale/src/toolkits/stats"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/container/list"
"github.com/toolkits/pkg/logger"
)
type OpenTsdbSection struct {
Enabled bool `yaml:"enabled"`
Name string `yaml:"name"`
Batch int `yaml:"batch"`
ConnTimeout int `yaml:"connTimeout"`
CallTimeout int `yaml:"callTimeout"`
WorkerNum int `yaml:"workerNum"`
MaxConns int `yaml:"maxConns"`
MaxIdle int `yaml:"maxIdle"`
MaxRetry int `yaml:"maxRetry"`
Address string `yaml:"address"`
}
type OpenTsdbPushEndpoint struct {
// config
Section OpenTsdbSection
OpenTsdbConnPoolHelper *pools.OpenTsdbConnPoolHelper
// 发送缓存队列 node -> queue_of_data
OpenTsdbQueue *list.SafeListLimited
}
func (opentsdb *OpenTsdbPushEndpoint) Init() {
// init connPool
if opentsdb.Section.Enabled {
opentsdb.OpenTsdbConnPoolHelper = pools.NewOpenTsdbConnPoolHelper(opentsdb.Section.Address,
opentsdb.Section.MaxConns, opentsdb.Section.MaxIdle, opentsdb.Section.ConnTimeout,
opentsdb.Section.CallTimeout)
}
// init queue
if opentsdb.Section.Enabled {
opentsdb.OpenTsdbQueue = list.NewSafeListLimited(DefaultSendQueueMaxSize)
}
// start task
openTsdbConcurrent := opentsdb.Section.WorkerNum
if openTsdbConcurrent < 1 {
openTsdbConcurrent = 1
}
go opentsdb.send2OpenTsdbTask(openTsdbConcurrent)
}
// 将原始数据入到tsdb发送缓存队列
func (opentsdb *OpenTsdbPushEndpoint) Push2Queue(items []*dataobj.MetricValue) {
errCnt := 0
for _, item := range items {
tsdbItem := opentsdb.convert2OpenTsdbItem(item)
isSuccess := opentsdb.OpenTsdbQueue.PushFront(tsdbItem)
if !isSuccess {
errCnt += 1
}
}
stats.Counter.Set("opentsdb.queue.err", errCnt)
}
func (opentsdb *OpenTsdbPushEndpoint) send2OpenTsdbTask(concurrent int) {
batch := opentsdb.Section.Batch // 一次发送,最多batch条数据
retry := opentsdb.Section.MaxRetry
addr := opentsdb.Section.Address
sema := semaphore.NewSemaphore(concurrent)
for {
items := opentsdb.OpenTsdbQueue.PopBackBy(batch)
count := len(items)
if count == 0 {
time.Sleep(DefaultSendTaskSleepInterval)
continue
}
var openTsdbBuffer bytes.Buffer
for i := 0; i < count; i++ {
tsdbItem := items[i].(*dataobj.OpenTsdbItem)
openTsdbBuffer.WriteString(tsdbItem.OpenTsdbString())
openTsdbBuffer.WriteString("\n")
stats.Counter.Set("points.out.opentsdb", 1)
logger.Debug("send to opentsdb: ", tsdbItem)
}
// 同步Call + 有限并发 进行发送
sema.Acquire()
go func(addr string, openTsdbBuffer bytes.Buffer, count int) {
defer sema.Release()
var err error
sendOk := false
for i := 0; i < retry; i++ {
err = opentsdb.OpenTsdbConnPoolHelper.Send(openTsdbBuffer.Bytes())
if err == nil {
sendOk = true
break
}
logger.Warningf("send opentsdb %s fail: %v", addr, err)
time.Sleep(100 * time.Millisecond)
}
if !sendOk {
stats.Counter.Set("points.out.opentsdb.err", count)
for _, item := range items {
logger.Errorf("send %v to opentsdb %s fail: %v", item, addr, err)
}
} else {
logger.Debugf("send to opentsdb %s ok", addr)
}
}(addr, openTsdbBuffer, count)
}
}
func (opentsdb *OpenTsdbPushEndpoint) convert2OpenTsdbItem(d *dataobj.MetricValue) *dataobj.OpenTsdbItem {
t := dataobj.OpenTsdbItem{Tags: make(map[string]string)}
for k, v := range d.TagsMap {
t.Tags[k] = v
}
t.Tags["endpoint"] = d.Endpoint
t.Metric = d.Metric
t.Timestamp = d.Timestamp
t.Value = d.Value
return &t
}

View File

@ -1,535 +0,0 @@
package backend
import (
"bytes"
"time"
"github.com/didi/nightingale/src/dataobj"
"github.com/didi/nightingale/src/model"
"github.com/didi/nightingale/src/modules/transfer/cache"
"github.com/didi/nightingale/src/toolkits/stats"
"github.com/didi/nightingale/src/toolkits/str"
client "github.com/influxdata/influxdb/client/v2"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/container/list"
"github.com/toolkits/pkg/logger"
)
// send
const (
DefaultSendTaskSleepInterval = time.Millisecond * 50 //默认睡眠间隔为50ms
MaxSendRetry = 10
)
var (
MinStep int //最小上报周期,单位sec
)
func startSendTasks() {
tsdbConcurrent := Config.WorkerNum
if tsdbConcurrent < 1 {
tsdbConcurrent = 1
}
judgeConcurrent := Config.WorkerNum
if judgeConcurrent < 1 {
judgeConcurrent = 1
}
influxdbConcurrent := Config.Influxdb.WorkerNum
if influxdbConcurrent < 1 {
influxdbConcurrent = 1
}
openTsdbConcurrent := Config.OpenTsdb.WorkerNum
if openTsdbConcurrent < 1 {
openTsdbConcurrent = 1
}
if Config.Enabled {
for node, item := range Config.ClusterList {
for _, addr := range item.Addrs {
queue := TsdbQueues[node+addr]
go Send2TsdbTask(queue, node, addr, tsdbConcurrent)
}
}
}
if Config.Enabled {
judgeQueue := JudgeQueues.GetAll()
for instance, queue := range judgeQueue {
go Send2JudgeTask(queue, instance, judgeConcurrent)
}
}
if Config.Influxdb.Enabled {
go send2InfluxdbTask(influxdbConcurrent)
}
if Config.OpenTsdb.Enabled {
go send2OpenTsdbTask(openTsdbConcurrent)
}
if Config.Kafka.Enabled {
go send2KafkaTask()
}
}
func Send2TsdbTask(Q *list.SafeListLimited, node, addr string, concurrent int) {
batch := Config.Batch // 一次发送,最多batch条数据
Q = TsdbQueues[node+addr]
sema := semaphore.NewSemaphore(concurrent)
for {
items := Q.PopBackBy(batch)
count := len(items)
if count == 0 {
time.Sleep(DefaultSendTaskSleepInterval)
continue
}
tsdbItems := make([]*dataobj.TsdbItem, count)
stats.Counter.Set("points.out.tsdb", count)
for i := 0; i < count; i++ {
tsdbItems[i] = items[i].(*dataobj.TsdbItem)
logger.Debug("send to tsdb->: ", tsdbItems[i])
}
//控制并发
sema.Acquire()
go func(addr string, tsdbItems []*dataobj.TsdbItem, count int) {
defer sema.Release()
resp := &dataobj.SimpleRpcResponse{}
var err error
sendOk := false
for i := 0; i < 3; i++ { //最多重试3次
err = TsdbConnPools.Call(addr, "Tsdb.Send", tsdbItems, resp)
if err == nil {
sendOk = true
break
}
time.Sleep(time.Millisecond * 10)
}
if !sendOk {
stats.Counter.Set("points.out.tsdb.err", count)
logger.Errorf("send %v to tsdb %s:%s fail: %v", tsdbItems, node, addr, err)
} else {
logger.Debugf("send to tsdb %s:%s ok", node, addr)
}
}(addr, tsdbItems, count)
}
}
// Push2TsdbSendQueue pushes data to a TSDB instance which depends on the consistent ring.
func Push2TsdbSendQueue(items []*dataobj.MetricValue) {
errCnt := 0
for _, item := range items {
tsdbItem := convert2TsdbItem(item)
stats.Counter.Set("tsdb.queue.push", 1)
node, err := TsdbNodeRing.GetNode(item.PK())
if err != nil {
logger.Warningf("get tsdb node error: %v", err)
continue
}
cnode := Config.ClusterList[node]
for _, addr := range cnode.Addrs {
Q := TsdbQueues[node+addr]
// 队列已满
if !Q.PushFront(tsdbItem) {
errCnt += 1
}
}
}
// statistics
if errCnt > 0 {
stats.Counter.Set("tsdb.queue.err", errCnt)
logger.Error("Push2TsdbSendQueue err num: ", errCnt)
}
}
func Send2JudgeTask(Q *list.SafeListLimited, addr string, concurrent int) {
batch := Config.Batch
sema := semaphore.NewSemaphore(concurrent)
for {
items := Q.PopBackBy(batch)
count := len(items)
if count == 0 {
time.Sleep(DefaultSendTaskSleepInterval)
continue
}
judgeItems := make([]*dataobj.JudgeItem, count)
stats.Counter.Set("points.out.judge", count)
for i := 0; i < count; i++ {
judgeItems[i] = items[i].(*dataobj.JudgeItem)
logger.Debug("send to judge: ", judgeItems[i])
}
sema.Acquire()
go func(addr string, judgeItems []*dataobj.JudgeItem, count int) {
defer sema.Release()
resp := &dataobj.SimpleRpcResponse{}
var err error
sendOk := false
for i := 0; i < MaxSendRetry; i++ {
err = JudgeConnPools.Call(addr, "Judge.Send", judgeItems, resp)
if err == nil {
sendOk = true
break
}
logger.Warningf("send judge %s fail: %v", addr, err)
time.Sleep(time.Millisecond * 10)
}
if !sendOk {
stats.Counter.Set("points.out.judge.err", count)
for _, item := range judgeItems {
logger.Errorf("send %v to judge %s fail: %v", item, addr, err)
}
}
}(addr, judgeItems, count)
}
}
func Push2JudgeSendQueue(items []*dataobj.MetricValue) {
errCnt := 0
for _, item := range items {
key := str.PK(item.Metric, item.Endpoint)
stras := cache.StraMap.GetByKey(key)
for _, stra := range stras {
if !TagMatch(stra.Tags, item.TagsMap) {
continue
}
judgeItem := &dataobj.JudgeItem{
Endpoint: item.Endpoint,
Metric: item.Metric,
Value: item.Value,
Timestamp: item.Timestamp,
DsType: item.CounterType,
Tags: item.Tags,
TagsMap: item.TagsMap,
Step: int(item.Step),
Sid: stra.Id,
Extra: item.Extra,
}
q, exists := JudgeQueues.Get(stra.JudgeInstance)
if exists {
if !q.PushFront(judgeItem) {
errCnt += 1
}
}
}
}
stats.Counter.Set("judge.queue.err", errCnt)
}
// 打到 Tsdb 的数据,要根据 rrdtool 的特定 来限制 step、counterType、timestamp
func convert2TsdbItem(d *dataobj.MetricValue) *dataobj.TsdbItem {
item := &dataobj.TsdbItem{
Endpoint: d.Endpoint,
Metric: d.Metric,
Value: d.Value,
Timestamp: d.Timestamp,
Tags: d.Tags,
TagsMap: d.TagsMap,
Step: int(d.Step),
Heartbeat: int(d.Step) * 2,
DsType: dataobj.GAUGE,
Min: "U",
Max: "U",
}
return item
}
func alignTs(ts int64, period int64) int64 {
return ts - ts%period
}
func TagMatch(straTags []model.Tag, tag map[string]string) bool {
for _, stag := range straTags {
if _, exists := tag[stag.Tkey]; !exists {
return false
}
var match bool
if stag.Topt == "=" { //当前策略 tagkey 对应的 tagv
for _, v := range stag.Tval {
if tag[stag.Tkey] == v {
match = true
break
}
}
} else {
match = true
for _, v := range stag.Tval {
if tag[stag.Tkey] == v {
match = false
return match
}
}
}
if !match {
return false
}
}
return true
}
type InfluxClient struct {
Client client.Client
Database string
Precision string
}
func NewInfluxdbClient() (*InfluxClient, error) {
c, err := client.NewHTTPClient(client.HTTPConfig{
Addr: Config.Influxdb.Address,
Username: Config.Influxdb.Username,
Password: Config.Influxdb.Password,
Timeout: time.Millisecond * time.Duration(Config.Influxdb.Timeout),
})
if err != nil {
return nil, err
}
return &InfluxClient{
Client: c,
Database: Config.Influxdb.Database,
Precision: Config.Influxdb.Precision,
}, nil
}
func (c *InfluxClient) Send(items []*dataobj.InfluxdbItem) error {
bp, err := client.NewBatchPoints(client.BatchPointsConfig{
Database: c.Database,
Precision: c.Precision,
})
if err != nil {
logger.Error("create batch points error: ", err)
return err
}
for _, item := range items {
pt, err := client.NewPoint(item.Measurement, item.Tags, item.Fields, time.Unix(item.Timestamp, 0))
if err != nil {
logger.Error("create new points error: ", err)
continue
}
bp.AddPoint(pt)
}
return c.Client.Write(bp)
}
// 将原始数据插入到influxdb缓存队列
func Push2InfluxdbSendQueue(items []*dataobj.MetricValue) {
errCnt := 0
for _, item := range items {
influxdbItem := convert2InfluxdbItem(item)
isSuccess := InfluxdbQueue.PushFront(influxdbItem)
if !isSuccess {
errCnt += 1
}
}
stats.Counter.Set("influxdb.queue.err", errCnt)
}
func convert2InfluxdbItem(d *dataobj.MetricValue) *dataobj.InfluxdbItem {
t := dataobj.InfluxdbItem{Tags: make(map[string]string), Fields: make(map[string]interface{})}
for k, v := range d.TagsMap {
t.Tags[k] = v
}
t.Tags["endpoint"] = d.Endpoint
t.Measurement = d.Metric
t.Fields["value"] = d.Value
t.Timestamp = d.Timestamp
return &t
}
func send2InfluxdbTask(concurrent int) {
batch := Config.Influxdb.Batch // 一次发送,最多batch条数据
retry := Config.Influxdb.MaxRetry
addr := Config.Influxdb.Address
sema := semaphore.NewSemaphore(concurrent)
var err error
c, err := NewInfluxdbClient()
defer c.Client.Close()
if err != nil {
logger.Errorf("init influxdb client fail: %v", err)
return
}
for {
items := InfluxdbQueue.PopBackBy(batch)
count := len(items)
if count == 0 {
time.Sleep(DefaultSendTaskSleepInterval)
continue
}
influxdbItems := make([]*dataobj.InfluxdbItem, count)
for i := 0; i < count; i++ {
influxdbItems[i] = items[i].(*dataobj.InfluxdbItem)
stats.Counter.Set("points.out.influxdb", 1)
logger.Debug("send to influxdb: ", influxdbItems[i])
}
// 同步Call + 有限并发 进行发送
sema.Acquire()
go func(addr string, influxdbItems []*dataobj.InfluxdbItem, count int) {
defer sema.Release()
sendOk := false
for i := 0; i < retry; i++ {
err = c.Send(influxdbItems)
if err == nil {
sendOk = true
break
}
logger.Warningf("send influxdb fail: %v", err)
time.Sleep(time.Millisecond * 10)
}
if !sendOk {
stats.Counter.Set("points.out.influxdb.err", count)
logger.Errorf("send %v to influxdb %s fail: %v", influxdbItems, addr, err)
} else {
logger.Debugf("send to influxdb %s ok", addr)
}
}(addr, influxdbItems, count)
}
}
// 将原始数据入到tsdb发送缓存队列
func Push2OpenTsdbSendQueue(items []*dataobj.MetricValue) {
errCnt := 0
for _, item := range items {
tsdbItem := convert2OpenTsdbItem(item)
isSuccess := OpenTsdbQueue.PushFront(tsdbItem)
if !isSuccess {
errCnt += 1
}
}
stats.Counter.Set("opentsdb.queue.err", errCnt)
}
func send2OpenTsdbTask(concurrent int) {
batch := Config.OpenTsdb.Batch // 一次发送,最多batch条数据
retry := Config.OpenTsdb.MaxRetry
addr := Config.OpenTsdb.Address
sema := semaphore.NewSemaphore(concurrent)
for {
items := OpenTsdbQueue.PopBackBy(batch)
count := len(items)
if count == 0 {
time.Sleep(DefaultSendTaskSleepInterval)
continue
}
var openTsdbBuffer bytes.Buffer
for i := 0; i < count; i++ {
tsdbItem := items[i].(*dataobj.OpenTsdbItem)
openTsdbBuffer.WriteString(tsdbItem.OpenTsdbString())
openTsdbBuffer.WriteString("\n")
stats.Counter.Set("points.out.opentsdb", 1)
logger.Debug("send to opentsdb: ", tsdbItem)
}
// 同步Call + 有限并发 进行发送
sema.Acquire()
go func(addr string, openTsdbBuffer bytes.Buffer, count int) {
defer sema.Release()
var err error
sendOk := false
for i := 0; i < retry; i++ {
err = OpenTsdbConnPoolHelper.Send(openTsdbBuffer.Bytes())
if err == nil {
sendOk = true
break
}
logger.Warningf("send opentsdb %s fail: %v", addr, err)
time.Sleep(100 * time.Millisecond)
}
if !sendOk {
stats.Counter.Set("points.out.opentsdb.err", count)
for _, item := range items {
logger.Errorf("send %v to opentsdb %s fail: %v", item, addr, err)
}
} else {
logger.Debugf("send to opentsdb %s ok", addr)
}
}(addr, openTsdbBuffer, count)
}
}
func convert2OpenTsdbItem(d *dataobj.MetricValue) *dataobj.OpenTsdbItem {
t := dataobj.OpenTsdbItem{Tags: make(map[string]string)}
for k, v := range d.TagsMap {
t.Tags[k] = v
}
t.Tags["endpoint"] = d.Endpoint
t.Metric = d.Metric
t.Timestamp = d.Timestamp
t.Value = d.Value
return &t
}
func Push2KafkaSendQueue(items []*dataobj.MetricValue) {
for _, item := range items {
KafkaQueue <- convert2KafkaItem(item)
}
}
func convert2KafkaItem(d *dataobj.MetricValue) KafkaData {
m := make(KafkaData)
m["metric"] = d.Metric
m["value"] = d.Value
m["timestamp"] = d.Timestamp
m["value"] = d.Value
m["step"] = d.Step
m["endpoint"] = d.Endpoint
m["tags"] = d.Tags
return m
}
func send2KafkaTask() {
kf, err := NewKfClient(Config.Kafka)
if err != nil {
logger.Errorf("init kafka client fail: %v", err)
return
}
defer kf.Close()
for {
kafkaItem := <-KafkaQueue
stats.Counter.Set("points.out.kafka", 1)
err = kf.Send(kafkaItem)
if err != nil {
stats.Counter.Set("points.out.kafka.err", 1)
logger.Errorf("send %v to kafka %s fail: %v", kafkaItem, Config.Kafka.BrokersPeers, err)
}
}
}

View File

@ -0,0 +1,57 @@
package tsdb
import (
"fmt"
"sync"
"time"
"github.com/didi/nightingale/src/toolkits/report"
"github.com/didi/nightingale/src/toolkits/stats"
"github.com/toolkits/pkg/logger"
)
var IndexList IndexAddrs
type IndexAddrs struct {
sync.RWMutex
Data []string
}
func (i *IndexAddrs) Set(addrs []string) {
i.Lock()
defer i.Unlock()
i.Data = addrs
}
func (i *IndexAddrs) Get() []string {
i.RLock()
defer i.RUnlock()
return i.Data
}
func GetIndexLoop() {
t1 := time.NewTicker(time.Duration(9) * time.Second)
GetIndex()
for {
<-t1.C
GetIndex()
}
}
func GetIndex() {
instances, err := report.GetAlive("index", "monapi")
if err != nil {
stats.Counter.Set("get.index.err", 1)
logger.Warningf("get index list err:%v", err)
return
}
activeIndexs := []string{}
for _, instance := range instances {
activeIndexs = append(activeIndexs, fmt.Sprintf("%s:%s", instance.Identity, instance.HTTPPort))
}
IndexList.Set(activeIndexs)
return
}

View File

@ -1,26 +1,24 @@
package backend package tsdb
import ( import (
"encoding/json"
"errors" "errors"
"fmt" "fmt"
"math/rand" "math/rand"
"strings"
"time" "time"
"github.com/toolkits/pkg/net/httplib"
"github.com/didi/nightingale/src/dataobj" "github.com/didi/nightingale/src/dataobj"
"github.com/didi/nightingale/src/modules/transfer/calc" "github.com/didi/nightingale/src/modules/transfer/calc"
"github.com/didi/nightingale/src/toolkits/address"
"github.com/didi/nightingale/src/toolkits/pools" "github.com/didi/nightingale/src/toolkits/pools"
"github.com/didi/nightingale/src/toolkits/stats" "github.com/didi/nightingale/src/toolkits/stats"
"github.com/toolkits/pkg/logger" "github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/net/httplib"
"github.com/toolkits/pkg/pool" "github.com/toolkits/pkg/pool"
) )
func FetchData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse { func (tsdb *TsdbDataSource) QueryData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse {
logger.Debugf("query data, inputs: %+v", inputs)
workerNum := 100 workerNum := 100
worker := make(chan struct{}, workerNum) // 控制 goroutine 并发数 worker := make(chan struct{}, workerNum) // 控制 goroutine 并发数
dataChan := make(chan *dataobj.TsdbQueryResponse, 20000) dataChan := make(chan *dataobj.TsdbQueryResponse, 20000)
@ -38,7 +36,8 @@ func FetchData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse {
for _, endpoint := range input.Endpoints { for _, endpoint := range input.Endpoints {
for _, counter := range input.Counters { for _, counter := range input.Counters {
worker <- struct{}{} worker <- struct{}{}
go fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker, dataChan) go tsdb.fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker,
dataChan)
} }
} }
} }
@ -55,7 +54,10 @@ func FetchData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse {
return resp return resp
} }
func FetchDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse { func (tsdb *TsdbDataSource) QueryDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse {
logger.Debugf("query data for ui, input: %+v", input)
workerNum := 100 workerNum := 100
worker := make(chan struct{}, workerNum) // 控制 goroutine 并发数 worker := make(chan struct{}, workerNum) // 控制 goroutine 并发数
dataChan := make(chan *dataobj.TsdbQueryResponse, 20000) dataChan := make(chan *dataobj.TsdbQueryResponse, 20000)
@ -71,22 +73,24 @@ func FetchDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse {
for _, endpoint := range input.Endpoints { for _, endpoint := range input.Endpoints {
if len(input.Tags) == 0 { if len(input.Tags) == 0 {
counter, err := GetCounter(input.Metric, "", nil) counter, err := dataobj.GetCounter(input.Metric, "", nil)
if err != nil { if err != nil {
logger.Warningf("get counter error: %+v", err) logger.Warningf("get counter error: %+v", err)
continue continue
} }
worker <- struct{}{} worker <- struct{}{}
go fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker, dataChan) go tsdb.fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker,
dataChan)
} else { } else {
for _, tag := range input.Tags { for _, tag := range input.Tags {
counter, err := GetCounter(input.Metric, tag, nil) counter, err := dataobj.GetCounter(input.Metric, tag, nil)
if err != nil { if err != nil {
logger.Warningf("get counter error: %+v", err) logger.Warningf("get counter error: %+v", err)
continue continue
} }
worker <- struct{}{} worker <- struct{}{}
go fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker, dataChan) go tsdb.fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker,
dataChan)
} }
} }
} }
@ -154,30 +158,16 @@ func FetchDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse {
return resp return resp
} }
func GetCounter(metric, tag string, tagMap map[string]string) (counter string, err error) { func (tsdb *TsdbDataSource) fetchDataSync(start, end int64, consolFun, endpoint, counter string, step int, worker chan struct{}, dataChan chan *dataobj.TsdbQueryResponse) {
if tagMap == nil {
tagMap, err = dataobj.SplitTagsString(tag)
if err != nil {
logger.Warningf("split tag string error: %+v", err)
return
}
}
tagStr := dataobj.SortedTags(tagMap)
counter = dataobj.PKWithTags(metric, tagStr)
return
}
func fetchDataSync(start, end int64, consolFun, endpoint, counter string, step int, worker chan struct{}, dataChan chan *dataobj.TsdbQueryResponse) {
defer func() { defer func() {
<-worker <-worker
}() }()
stats.Counter.Set("query.tsdb", 1) stats.Counter.Set("query.tsdb", 1)
data, err := fetchData(start, end, consolFun, endpoint, counter, step) data, err := tsdb.fetchData(start, end, consolFun, endpoint, counter, step)
if err != nil { if err != nil {
logger.Warningf("fetch tsdb data error: %+v", err) logger.Warningf("fetch tsdb data error: %+v", err)
stats.Counter.Set("query.data.err", 1) stats.Counter.Set("query.tsdb.err", 1)
data.Endpoint = endpoint data.Endpoint = endpoint
data.Counter = counter data.Counter = counter
data.Step = step data.Step = step
@ -185,11 +175,11 @@ func fetchDataSync(start, end int64, consolFun, endpoint, counter string, step i
dataChan <- data dataChan <- data
} }
func fetchData(start, end int64, consolFun, endpoint, counter string, step int) (*dataobj.TsdbQueryResponse, error) { func (tsdb *TsdbDataSource) fetchData(start, end int64, consolFun, endpoint, counter string, step int) (*dataobj.TsdbQueryResponse, error) {
var resp *dataobj.TsdbQueryResponse var resp *dataobj.TsdbQueryResponse
qparm := GenQParam(start, end, consolFun, endpoint, counter, step) qparm := genQParam(start, end, consolFun, endpoint, counter, step)
resp, err := QueryOne(qparm) resp, err := tsdb.QueryOne(qparm)
if err != nil { if err != nil {
return resp, err return resp, err
} }
@ -200,12 +190,7 @@ func fetchData(start, end int64, consolFun, endpoint, counter string, step int)
return resp, nil return resp, nil
} }
func getCounterStep(endpoint, counter string) (step int, err error) { func genQParam(start, end int64, consolFunc, endpoint, counter string, step int) dataobj.TsdbQueryParam {
//从内存中获取
return
}
func GenQParam(start, end int64, consolFunc, endpoint, counter string, step int) dataobj.TsdbQueryParam {
return dataobj.TsdbQueryParam{ return dataobj.TsdbQueryParam{
Start: start, Start: start,
End: end, End: end,
@ -216,12 +201,12 @@ func GenQParam(start, end int64, consolFunc, endpoint, counter string, step int)
} }
} }
func QueryOne(para dataobj.TsdbQueryParam) (resp *dataobj.TsdbQueryResponse, err error) { func (tsdb *TsdbDataSource) QueryOne(para dataobj.TsdbQueryParam) (resp *dataobj.TsdbQueryResponse, err error) {
start, end := para.Start, para.End start, end := para.Start, para.End
resp = &dataobj.TsdbQueryResponse{} resp = &dataobj.TsdbQueryResponse{}
pk := dataobj.PKWithCounter(para.Endpoint, para.Counter) pk := dataobj.PKWithCounter(para.Endpoint, para.Counter)
ps, err := SelectPoolByPK(pk) ps, err := tsdb.SelectPoolByPK(pk)
if err != nil { if err != nil {
return resp, err return resp, err
} }
@ -259,7 +244,7 @@ func QueryOne(para dataobj.TsdbQueryParam) (resp *dataobj.TsdbQueryResponse, err
}() }()
select { select {
case <-time.After(time.Duration(callTimeout) * time.Millisecond): case <-time.After(time.Duration(tsdb.Section.CallTimeout) * time.Millisecond):
onePool.ForceClose(conn) onePool.ForceClose(conn)
logger.Errorf("%s, call timeout. proc: %s", addr, onePool.Proc()) logger.Errorf("%s, call timeout. proc: %s", addr, onePool.Proc())
break break
@ -297,20 +282,20 @@ type Pool struct {
Addr string Addr string
} }
func SelectPoolByPK(pk string) ([]Pool, error) { func (tsdb *TsdbDataSource) SelectPoolByPK(pk string) ([]Pool, error) {
node, err := TsdbNodeRing.GetNode(pk) node, err := tsdb.TsdbNodeRing.GetNode(pk)
if err != nil { if err != nil {
return []Pool{}, err return []Pool{}, err
} }
nodeAddrs, found := Config.ClusterList[node] nodeAddrs, found := tsdb.Section.ClusterList[node]
if !found { if !found {
return []Pool{}, errors.New("node not found") return []Pool{}, errors.New("node not found")
} }
var ps []Pool var ps []Pool
for _, addr := range nodeAddrs.Addrs { for _, addr := range nodeAddrs.Addrs {
onePool, found := TsdbConnPools.Get(addr) onePool, found := tsdb.TsdbConnPools.Get(addr)
if !found { if !found {
logger.Errorf("addr %s not found", addr) logger.Errorf("addr %s not found", addr)
continue continue
@ -325,97 +310,113 @@ func SelectPoolByPK(pk string) ([]Pool, error) {
return ps, nil return ps, nil
} }
func getTags(counter string) (tags string) { type IndexMetricsResp struct {
idx := strings.IndexAny(counter, "/") Data *dataobj.MetricResp `json:"dat"`
if idx == -1 { Err string `json:"err"`
return ""
}
return counter[idx+1:]
} }
type Tagkv struct { func (tsdb *TsdbDataSource) QueryMetrics(recv dataobj.EndpointsRecv) *dataobj.MetricResp {
TagK string `json:"tagk"` var result IndexMetricsResp
TagV []string `json:"tagv"` err := PostIndex("/api/index/metrics", int64(tsdb.Section.CallTimeout), recv, &result)
}
type SeriesReq struct {
Endpoints []string `json:"endpoints"`
Metric string `json:"metric"`
Tagkv []*Tagkv `json:"tagkv"`
}
type SeriesResp struct {
Dat []Series `json:"dat"`
Err string `json:"err"`
}
type Series struct {
Endpoints []string `json:"endpoints"`
Metric string `json:"metric"`
Tags []string `json:"tags"`
Step int `json:"step"`
DsType string `json:"dstype"`
}
func GetSeries(start, end int64, req []SeriesReq) ([]dataobj.QueryData, error) {
var res SeriesResp
var queryDatas []dataobj.QueryData
if len(req) < 1 {
return queryDatas, fmt.Errorf("req length < 1")
}
addrs := address.GetHTTPAddresses("index")
if len(addrs) < 1 {
return queryDatas, fmt.Errorf("index addr is nil")
}
i := rand.Intn(len(addrs))
addr := fmt.Sprintf("http://%s/api/index/counter/fullmatch", addrs[i])
resp, code, err := httplib.PostJSON(addr, time.Duration(Config.IndexTimeout)*time.Millisecond, req, nil)
if err != nil { if err != nil {
return queryDatas, err logger.Errorf("post index failed, %+v", err)
return nil
} }
if code != 200 { if result.Err != "" {
return nil, fmt.Errorf("index response status code != 200") logger.Errorf("index xclude failed, %+v", result.Err)
return nil
} }
if err = json.Unmarshal(resp, &res); err != nil { return result.Data
logger.Error(string(resp)) }
return queryDatas, err
} type IndexTagPairsResp struct {
Data []dataobj.IndexTagkvResp `json:"dat"`
for _, item := range res.Dat { Err string `json:"err"`
counters := make([]string, 0) }
if len(item.Tags) == 0 {
counters = append(counters, item.Metric) func (tsdb *TsdbDataSource) QueryTagPairs(recv dataobj.EndpointMetricRecv) []dataobj.IndexTagkvResp {
} else { var result IndexTagPairsResp
for _, tag := range item.Tags { err := PostIndex("/api/index/tagkv", int64(tsdb.Section.CallTimeout), recv, &result)
tagMap, err := dataobj.SplitTagsString(tag) if err != nil {
if err != nil { logger.Errorf("post index failed, %+v", err)
logger.Warning(err, tag) return nil
continue }
}
tagStr := dataobj.SortedTags(tagMap) if result.Err != "" || len(result.Data) == 0 {
counter := dataobj.PKWithTags(item.Metric, tagStr) logger.Errorf("index xclude failed, %+v", result.Err)
counters = append(counters, counter) return nil
} }
}
return result.Data
queryData := dataobj.QueryData{ }
Start: start,
End: end, type IndexCludeResp struct {
Endpoints: item.Endpoints, Data []dataobj.XcludeResp `json:"dat"`
Counters: counters, Err string `json:"err"`
ConsolFunc: "AVERAGE", }
DsType: item.DsType,
Step: item.Step, func (tsdb *TsdbDataSource) QueryIndexByClude(recv []dataobj.CludeRecv) []dataobj.XcludeResp {
} var result IndexCludeResp
queryDatas = append(queryDatas, queryData) err := PostIndex("/api/index/counter/clude", int64(tsdb.Section.CallTimeout), recv, &result)
} if err != nil {
logger.Errorf("post index failed, %+v", err)
return queryDatas, err return nil
}
if result.Err != "" || len(result.Data) == 0 {
logger.Errorf("index xclude failed, %+v", result.Err)
return nil
}
return result.Data
}
type IndexByFullTagsResp struct {
Data []dataobj.IndexByFullTagsResp `json:"dat"`
Err string `json:"err"`
}
func (tsdb *TsdbDataSource) QueryIndexByFullTags(recv []dataobj.IndexByFullTagsRecv) []dataobj.IndexByFullTagsResp {
var result IndexByFullTagsResp
err := PostIndex("/api/index/counter/fullmatch", int64(tsdb.Section.CallTimeout),
recv, &result)
if err != nil {
logger.Errorf("post index failed, %+v", err)
return nil
}
if result.Err != "" || len(result.Data) == 0 {
logger.Errorf("index fullTags failed, %+v", result.Err)
return nil
}
return result.Data
}
func PostIndex(url string, calltimeout int64, recv interface{}, resp interface{}) error {
addrs := IndexList.Get()
if len(addrs) == 0 {
logger.Errorf("empty index addr")
return errors.New("empty index addr")
}
perm := rand.Perm(len(addrs))
var err error
for i := range perm {
url := fmt.Sprintf("http://%s%s", addrs[perm[i]], url)
err = httplib.Post(url).JSONBodyQuiet(recv).SetTimeout(
time.Duration(calltimeout) * time.Millisecond).ToJSON(&resp)
if err == nil {
break
}
logger.Warningf("index %s failed, error:%v, req:%+v", url, err, recv)
}
if err != nil {
logger.Errorf("index %s failed, error:%v, req:%+v", url, err, recv)
return err
}
return nil
} }

View File

@ -1,4 +1,4 @@
package backend package tsdb
import ( import (
"sync" "sync"

View File

@ -0,0 +1,212 @@
package tsdb
import (
"strings"
"time"
"github.com/toolkits/pkg/errors"
"github.com/didi/nightingale/src/dataobj"
"github.com/didi/nightingale/src/toolkits/pools"
"github.com/didi/nightingale/src/toolkits/stats"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/container/list"
"github.com/toolkits/pkg/container/set"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
type TsdbSection struct {
Enabled bool `yaml:"enabled"`
Name string `yaml:"name"`
Batch int `yaml:"batch"`
ConnTimeout int `yaml:"connTimeout"`
CallTimeout int `yaml:"callTimeout"`
WorkerNum int `yaml:"workerNum"`
MaxConns int `yaml:"maxConns"`
MaxIdle int `yaml:"maxIdle"`
IndexTimeout int `yaml:"indexTimeout"`
Replicas int `yaml:"replicas"`
Cluster map[string]string `yaml:"cluster"`
ClusterList map[string]*ClusterNode `json:"clusterList"`
}
type ClusterNode struct {
Addrs []string `json:"addrs"`
}
type TsdbDataSource struct {
//config
Section TsdbSection
SendQueueMaxSize int
SendTaskSleepInterval time.Duration
// 服务节点的一致性哈希环 pk -> node
TsdbNodeRing *ConsistentHashRing
// 发送缓存队列 node -> queue_of_data
TsdbQueues map[string]*list.SafeListLimited
// 连接池 node_address -> connection_pool
TsdbConnPools *pools.ConnPools
}
func (tsdb *TsdbDataSource) Init() {
// init hash ring
tsdb.TsdbNodeRing = NewConsistentHashRing(int32(tsdb.Section.Replicas),
str.KeysOfMap(tsdb.Section.Cluster))
// init connPool
tsdbInstances := set.NewSafeSet()
for _, item := range tsdb.Section.ClusterList {
for _, addr := range item.Addrs {
tsdbInstances.Add(addr)
}
}
tsdb.TsdbConnPools = pools.NewConnPools(
tsdb.Section.MaxConns, tsdb.Section.MaxIdle, tsdb.Section.ConnTimeout, tsdb.Section.CallTimeout,
tsdbInstances.ToSlice(),
)
// init queues
tsdb.TsdbQueues = make(map[string]*list.SafeListLimited)
for node, item := range tsdb.Section.ClusterList {
for _, addr := range item.Addrs {
tsdb.TsdbQueues[node+addr] = list.NewSafeListLimited(tsdb.SendQueueMaxSize)
}
}
// start task
tsdbConcurrent := tsdb.Section.WorkerNum
if tsdbConcurrent < 1 {
tsdbConcurrent = 1
}
for node, item := range tsdb.Section.ClusterList {
for _, addr := range item.Addrs {
queue := tsdb.TsdbQueues[node+addr]
go tsdb.Send2TsdbTask(queue, node, addr, tsdbConcurrent)
}
}
go GetIndexLoop()
}
// Push2TsdbSendQueue pushes data to a TSDB instance which depends on the consistent ring.
func (tsdb *TsdbDataSource) Push2Queue(items []*dataobj.MetricValue) {
errCnt := 0
for _, item := range items {
tsdbItem := convert2TsdbItem(item)
stats.Counter.Set("tsdb.queue.push", 1)
node, err := tsdb.TsdbNodeRing.GetNode(item.PK())
if err != nil {
logger.Warningf("get tsdb node error: %v", err)
continue
}
cnode := tsdb.Section.ClusterList[node]
for _, addr := range cnode.Addrs {
Q := tsdb.TsdbQueues[node+addr]
// 队列已满
if !Q.PushFront(tsdbItem) {
errCnt += 1
}
}
}
// statistics
if errCnt > 0 {
stats.Counter.Set("tsdb.queue.err", errCnt)
logger.Error("Push2TsdbSendQueue err num: ", errCnt)
}
}
func (tsdb *TsdbDataSource) Send2TsdbTask(Q *list.SafeListLimited, node, addr string, concurrent int) {
batch := tsdb.Section.Batch // 一次发送,最多batch条数据
Q = tsdb.TsdbQueues[node+addr]
sema := semaphore.NewSemaphore(concurrent)
for {
items := Q.PopBackBy(batch)
count := len(items)
if count == 0 {
time.Sleep(tsdb.SendTaskSleepInterval)
continue
}
tsdbItems := make([]*dataobj.TsdbItem, count)
stats.Counter.Set("points.out.tsdb", count)
for i := 0; i < count; i++ {
tsdbItems[i] = items[i].(*dataobj.TsdbItem)
logger.Debug("send to tsdb->: ", tsdbItems[i])
}
//控制并发
sema.Acquire()
go func(addr string, tsdbItems []*dataobj.TsdbItem, count int) {
defer sema.Release()
resp := &dataobj.SimpleRpcResponse{}
var err error
sendOk := false
for i := 0; i < 3; i++ { //最多重试3次
err = tsdb.TsdbConnPools.Call(addr, "Tsdb.Send", tsdbItems, resp)
if err == nil {
sendOk = true
break
}
time.Sleep(time.Millisecond * 10)
}
if !sendOk {
stats.Counter.Set("points.out.tsdb.err", count)
logger.Errorf("send %v to tsdb %s:%s fail: %v", tsdbItems, node, addr, err)
} else {
logger.Debugf("send to tsdb %s:%s ok", node, addr)
}
}(addr, tsdbItems, count)
}
}
func (tsdb *TsdbDataSource) GetInstance(metric, endpoint string, tags map[string]string) []string {
counter, err := dataobj.GetCounter(metric, "", tags)
errors.Dangerous(err)
pk := dataobj.PKWithCounter(endpoint, counter)
pools, err := tsdb.SelectPoolByPK(pk)
addrs := make([]string, len(pools))
for i, pool := range pools {
addrs[i] = pool.Addr
}
return addrs
}
// 打到 Tsdb 的数据,要根据 rrdtool 的特定 来限制 step、counterType、timestamp
func convert2TsdbItem(d *dataobj.MetricValue) *dataobj.TsdbItem {
item := &dataobj.TsdbItem{
Endpoint: d.Endpoint,
Metric: d.Metric,
Value: d.Value,
Timestamp: d.Timestamp,
Tags: d.Tags,
TagsMap: d.TagsMap,
Step: int(d.Step),
Heartbeat: int(d.Step) * 2,
DsType: dataobj.GAUGE,
Min: "U",
Max: "U",
}
return item
}
func getTags(counter string) (tags string) {
idx := strings.IndexAny(counter, "/")
if idx == -1 {
return ""
}
return counter[idx+1:]
}

View File

@ -3,9 +3,12 @@ package config
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"github.com/didi/nightingale/src/toolkits/identity"
"github.com/didi/nightingale/src/toolkits/report"
"strings" "strings"
"github.com/didi/nightingale/src/modules/transfer/backend" "github.com/didi/nightingale/src/modules/transfer/backend"
"github.com/didi/nightingale/src/modules/transfer/backend/tsdb"
"github.com/didi/nightingale/src/toolkits/logger" "github.com/didi/nightingale/src/toolkits/logger"
"github.com/spf13/viper" "github.com/spf13/viper"
@ -13,12 +16,14 @@ import (
) )
type ConfYaml struct { type ConfYaml struct {
Debug bool `yaml:"debug"` Debug bool `yaml:"debug"`
MinStep int `yaml:"minStep"` MinStep int `yaml:"minStep"`
Logger logger.LoggerSection `yaml:"logger"` Logger logger.LoggerSection `yaml:"logger"`
Backend backend.BackendSection `yaml:"backend"` Backend backend.BackendSection `yaml:"backend"`
HTTP HTTPSection `yaml:"http"` HTTP HTTPSection `yaml:"http"`
RPC RPCSection `yaml:"rpc"` RPC RPCSection `yaml:"rpc"`
Identity identity.IdentitySection `yaml:"identity"`
Report report.ReportSection `yaml:"report"`
} }
type IndexSection struct { type IndexSection struct {
@ -45,13 +50,13 @@ var (
Config *ConfYaml Config *ConfYaml
) )
func NewClusterNode(addrs []string) *backend.ClusterNode { func NewClusterNode(addrs []string) *tsdb.ClusterNode {
return &backend.ClusterNode{Addrs: addrs} return &tsdb.ClusterNode{Addrs: addrs}
} }
// map["node"]="host1,host2" --> map["node"]=["host1", "host2"] // map["node"]="host1,host2" --> map["node"]=["host1", "host2"]
func formatClusterItems(cluster map[string]string) map[string]*backend.ClusterNode { func formatClusterItems(cluster map[string]string) map[string]*tsdb.ClusterNode {
ret := make(map[string]*backend.ClusterNode) ret := make(map[string]*tsdb.ClusterNode)
for node, clusterStr := range cluster { for node, clusterStr := range cluster {
items := strings.Split(clusterStr, ",") items := strings.Split(clusterStr, ",")
nitems := make([]string, 0) nitems := make([]string, 0)
@ -80,21 +85,36 @@ func Parse(conf string) error {
viper.SetDefault("minStep", 1) viper.SetDefault("minStep", 1)
viper.SetDefault("backend", map[string]interface{}{ viper.SetDefault("backend", map[string]interface{}{
"datasource": "tsdb",
"straPath": "/api/portal/stras/effective?all=1",
})
viper.SetDefault("backend.judge", map[string]interface{}{
"batch": 200, //每次拉取文件的个数
"workerNum": 32,
"maxConns": 2000, //查询和推送数据的并发个数
"maxIdle": 32, //建立的连接池的最大空闲数
"connTimeout": 1000, //链接超时时间,单位毫秒
"callTimeout": 3000, //访问超时时间,单位毫秒
"hbsMod": "monapi",
})
viper.SetDefault("backend.tsdb", map[string]interface{}{
"enabled": true, "enabled": true,
"name": "tsdb",
"batch": 200, //每次拉取文件的个数 "batch": 200, //每次拉取文件的个数
"replicas": 500, //一致性hash虚拟节点
"workerNum": 32, "workerNum": 32,
"maxConns": 2000, //查询和推送数据的并发个数 "maxConns": 2000, //查询和推送数据的并发个数
"maxIdle": 32, //建立的连接池的最大空闲数 "maxIdle": 32, //建立的连接池的最大空闲数
"connTimeout": 1000, //链接超时时间,单位毫秒 "connTimeout": 1000, //链接超时时间,单位毫秒
"callTimeout": 3000, //访问超时时间,单位毫秒 "callTimeout": 3000, //访问超时时间,单位毫秒
"indexTimeout": 3000, //访问index超时时间单位毫秒 "indexTimeout": 3000, //访问index超时时间单位毫秒
"straPath": "/api/portal/stras/effective?all=1", "replicas": 500, //一致性hash虚拟节点
"hbsMod": "monapi",
}) })
viper.SetDefault("backend.influxdb", map[string]interface{}{ viper.SetDefault("backend.influxdb", map[string]interface{}{
"enabled": false, "enabled": false,
"name": "influxdb",
"batch": 200, //每次拉取文件的个数 "batch": 200, //每次拉取文件的个数
"maxRetry": 3, //重试次数 "maxRetry": 3, //重试次数
"workerNum": 32, "workerNum": 32,
@ -104,6 +124,7 @@ func Parse(conf string) error {
viper.SetDefault("backend.opentsdb", map[string]interface{}{ viper.SetDefault("backend.opentsdb", map[string]interface{}{
"enabled": false, "enabled": false,
"name": "opentsdb",
"batch": 200, //每次拉取文件的个数 "batch": 200, //每次拉取文件的个数
"maxRetry": 3, //重试次数 "maxRetry": 3, //重试次数
"workerNum": 32, "workerNum": 32,
@ -113,12 +134,29 @@ func Parse(conf string) error {
"callTimeout": 3000, //访问超时时间,单位毫秒 "callTimeout": 3000, //访问超时时间,单位毫秒
}) })
viper.SetDefault("backend.kafka", map[string]interface{}{
"enabled": false,
"name": "kafka",
"maxRetry": 3, //重试次数
"connTimeout": 1000, //链接超时时间,单位毫秒
"callTimeout": 3000, //访问超时时间,单位毫秒
})
viper.SetDefault("report", map[string]interface{}{
"mod": "transfer",
"enabled": true,
"interval": 4000,
"timeout": 3000,
"api": "api/hbs/heartbeat",
"remark": "",
})
err = viper.Unmarshal(&Config) err = viper.Unmarshal(&Config)
if err != nil { if err != nil {
return fmt.Errorf("cannot read yml[%s]: %v", conf, err) return fmt.Errorf("cannot read yml[%s]: %v", conf, err)
} }
Config.Backend.ClusterList = formatClusterItems(Config.Backend.Cluster) Config.Backend.Tsdb.ClusterList = formatClusterItems(Config.Backend.Tsdb.Cluster)
return err return err
} }

View File

@ -26,7 +26,7 @@ func updateJudgeQueue() {
if !backend.JudgeQueues.Exists(instance) { if !backend.JudgeQueues.Exists(instance) {
q := list.NewSafeListLimited(backend.DefaultSendQueueMaxSize) q := list.NewSafeListLimited(backend.DefaultSendQueueMaxSize)
backend.JudgeQueues.Set(instance, q) backend.JudgeQueues.Set(instance, q)
go backend.Send2JudgeTask(q, instance, backend.Config.WorkerNum) go backend.Send2JudgeTask(q, instance, backend.Judge.WorkerNum)
} else { } else {
backend.JudgeQueues.UpdateTS(instance) backend.JudgeQueues.UpdateTS(instance)
} }

View File

@ -41,7 +41,7 @@ func getStrategy() {
perm := rand.Perm(len(addrs)) perm := rand.Perm(len(addrs))
var err error var err error
for i := range perm { for i := range perm {
url := fmt.Sprintf("http://%s%s", addrs[perm[i]], backend.Config.StraPath) url := fmt.Sprintf("http://%s%s", addrs[perm[i]], backend.StraPath)
err = httplib.Get(url).SetTimeout(time.Duration(3000) * time.Millisecond).ToJSON(&stras) err = httplib.Get(url).SetTimeout(time.Duration(3000) * time.Millisecond).ToJSON(&stras)
if err != nil { if err != nil {

View File

@ -4,7 +4,8 @@ import (
"fmt" "fmt"
"os" "os"
"github.com/didi/nightingale/src/dataobj" "github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/src/modules/transfer/backend" "github.com/didi/nightingale/src/modules/transfer/backend"
"github.com/didi/nightingale/src/modules/transfer/cache" "github.com/didi/nightingale/src/modules/transfer/cache"
"github.com/didi/nightingale/src/toolkits/http/render" "github.com/didi/nightingale/src/toolkits/http/render"
@ -50,16 +51,14 @@ func tsdbInstance(c *gin.Context) {
var input tsdbInstanceRecv var input tsdbInstanceRecv
errors.Dangerous(c.ShouldBindJSON(&input)) errors.Dangerous(c.ShouldBindJSON(&input))
counter, err := backend.GetCounter(input.Metric, "", input.TagMap) dataSource, err := backend.GetDataSourceFor("tsdb")
errors.Dangerous(err) if err != nil {
logger.Warningf("could not find datasource")
pk := dataobj.PKWithCounter(input.Endpoint, counter) render.Message(c, err)
pools, err := backend.SelectPoolByPK(pk) return
addrs := make([]string, len(pools))
for i, pool := range pools {
addrs[i] = pool.Addr
} }
addrs := dataSource.GetInstance(input.Metric, input.Endpoint, input.TagMap)
render.Data(c, addrs, nil) render.Data(c, addrs, nil)
} }

View File

@ -40,24 +40,19 @@ func PushData(c *gin.Context) {
metricValues = append(metricValues, v) metricValues = append(metricValues, v)
} }
if backend.Config.Enabled { // send to judge
backend.Push2TsdbSendQueue(metricValues) backend.Push2JudgeQueue(metricValues)
}
if backend.Config.Enabled { // send to push endpoints
backend.Push2JudgeSendQueue(metricValues) pushEndpoints, err := backend.GetPushEndpoints()
} if err != nil {
logger.Errorf("could not find pushendpoint")
if backend.Config.Influxdb.Enabled { render.Data(c, "error", err)
backend.Push2InfluxdbSendQueue(metricValues) return
} } else {
for _, pushendpoint := range pushEndpoints {
if backend.Config.OpenTsdb.Enabled { pushendpoint.Push2Queue(metricValues)
backend.Push2OpenTsdbSendQueue(metricValues) }
}
if backend.Config.Kafka.Enabled {
backend.Push2KafkaSendQueue(metricValues)
} }
if msg != "" { if msg != "" {

View File

@ -12,34 +12,22 @@ import (
) )
type QueryDataReq struct { type QueryDataReq struct {
Start int64 `json:"start"` queryData []dataobj.QueryData
End int64 `json:"end"`
Series []backend.SeriesReq `json:"series"`
}
func QueryDataForJudge(c *gin.Context) {
var inputs []dataobj.QueryData
errors.Dangerous(c.ShouldBindJSON(&inputs))
resp := backend.FetchData(inputs)
render.Data(c, resp, nil)
} }
func QueryData(c *gin.Context) { func QueryData(c *gin.Context) {
stats.Counter.Set("data.api.qp10s", 1) stats.Counter.Set("data.api.qp10s", 1)
var input QueryDataReq dataSource, err := backend.GetDataSourceFor("")
errors.Dangerous(c.ShouldBindJSON(&input))
queryData, err := backend.GetSeries(input.Start, input.End, input.Series)
if err != nil { if err != nil {
logger.Error(err, input) logger.Warningf("could not find datasource")
render.Message(c, "query err") render.Message(c, err)
return return
} }
resp := backend.FetchData(queryData) var queryDataReq QueryDataReq
errors.Dangerous(c.ShouldBindJSON(&queryDataReq))
resp := dataSource.QueryData(queryDataReq.queryData)
render.Data(c, resp, nil) render.Data(c, resp, nil)
} }
@ -51,7 +39,13 @@ func QueryDataForUI(c *gin.Context) {
start := input.Start start := input.Start
end := input.End end := input.End
resp := backend.FetchDataForUI(input) dataSource, err := backend.GetDataSourceFor("")
if err != nil {
logger.Warningf("could not find datasource")
render.Message(c, err)
return
}
resp := dataSource.QueryDataForUI(input)
for _, d := range resp { for _, d := range resp {
data := &dataobj.QueryDataForUIResp{ data := &dataobj.QueryDataForUIResp{
Start: d.Start, Start: d.Start,
@ -70,7 +64,7 @@ func QueryDataForUI(c *gin.Context) {
comparison := input.Comparisons[i] comparison := input.Comparisons[i]
input.Start = start - comparison input.Start = start - comparison
input.End = end - comparison input.End = end - comparison
res := backend.FetchDataForUI(input) res := dataSource.QueryDataForUI(input)
for _, d := range res { for _, d := range res {
for j := range d.Values { for j := range d.Values {
d.Values[j].Timestamp += comparison d.Values[j].Timestamp += comparison
@ -93,3 +87,68 @@ func QueryDataForUI(c *gin.Context) {
render.Data(c, respData, nil) render.Data(c, respData, nil)
} }
func GetMetrics(c *gin.Context) {
stats.Counter.Set("metric.qp10s", 1)
recv := dataobj.EndpointsRecv{}
errors.Dangerous(c.ShouldBindJSON(&recv))
dataSource, err := backend.GetDataSourceFor("")
if err != nil {
logger.Warningf("could not find datasource")
render.Message(c, err)
return
}
resp := dataSource.QueryMetrics(recv)
render.Data(c, resp, nil)
}
func GetTagPairs(c *gin.Context) {
stats.Counter.Set("tag.qp10s", 1)
recv := dataobj.EndpointMetricRecv{}
errors.Dangerous(c.ShouldBindJSON(&recv))
dataSource, err := backend.GetDataSourceFor("")
if err != nil {
logger.Warningf("could not find datasource")
render.Message(c, err)
return
}
resp := dataSource.QueryTagPairs(recv)
render.Data(c, resp, nil)
}
func GetIndexByClude(c *gin.Context) {
stats.Counter.Set("xclude.qp10s", 1)
recvs := make([]dataobj.CludeRecv, 0)
errors.Dangerous(c.ShouldBindJSON(&recvs))
dataSource, err := backend.GetDataSourceFor("")
if err != nil {
logger.Warningf("could not find datasource")
render.Message(c, err)
return
}
resp := dataSource.QueryIndexByClude(recvs)
render.Data(c, resp, nil)
}
func GetIndexByFullTags(c *gin.Context) {
stats.Counter.Set("counter.qp10s", 1)
recvs := make([]dataobj.IndexByFullTagsRecv, 0)
errors.Dangerous(c.ShouldBindJSON(&recvs))
dataSource, err := backend.GetDataSourceFor("")
if err != nil {
logger.Warningf("could not find datasource")
render.Message(c, err)
return
}
resp := dataSource.QueryIndexByFullTags(recvs)
render.Data(c, resp, nil)
}

View File

@ -22,6 +22,14 @@ func Config(r *gin.Engine) {
sys.POST("/data/ui", QueryDataForUI) sys.POST("/data/ui", QueryDataForUI)
} }
index := r.Group("/api/index")
{
index.POST("/metrics", GetMetrics)
index.POST("/tagkv", GetTagPairs)
index.POST("/counter/clude", GetIndexByClude)
index.POST("/counter/fullmatch", GetIndexByFullTags)
}
v2 := r.Group("/api/transfer/v2") v2 := r.Group("/api/transfer/v2")
{ {
v2.POST("/data", QueryData) v2.POST("/data", QueryData)

View File

@ -36,25 +36,20 @@ func (t *Transfer) Push(args []*dataobj.MetricValue, reply *dataobj.TransferResp
items = append(items, v) items = append(items, v)
} }
if backend.Config.Enabled { // send to judge
backend.Push2TsdbSendQueue(items) backend.Push2JudgeQueue(items)
// send to push endpoints
pushEndpoints, err := backend.GetPushEndpoints()
if err != nil {
logger.Errorf("could not find pushendpoint")
return err
} else {
for _, pushendpoint := range pushEndpoints {
pushendpoint.Push2Queue(items)
}
} }
if backend.Config.Enabled {
backend.Push2JudgeSendQueue(items)
}
if backend.Config.Influxdb.Enabled {
backend.Push2InfluxdbSendQueue(items)
}
if backend.Config.OpenTsdb.Enabled {
backend.Push2OpenTsdbSendQueue(items)
}
if backend.Config.Kafka.Enabled {
backend.Push2KafkaSendQueue(items)
}
if reply.Invalid == 0 { if reply.Invalid == 0 {
reply.Msg = "ok" reply.Msg = "ok"
} }

View File

@ -3,9 +3,15 @@ package rpc
import ( import (
"github.com/didi/nightingale/src/dataobj" "github.com/didi/nightingale/src/dataobj"
"github.com/didi/nightingale/src/modules/transfer/backend" "github.com/didi/nightingale/src/modules/transfer/backend"
"github.com/toolkits/pkg/logger"
) )
func (t *Transfer) Query(args []dataobj.QueryData, reply *dataobj.QueryDataResp) error { func (t *Transfer) Query(args []dataobj.QueryData, reply *dataobj.QueryDataResp) error {
reply.Data = backend.FetchData(args) dataSource, err := backend.GetDataSourceFor("")
if err != nil {
logger.Warningf("could not find datasource")
return err
}
reply.Data = dataSource.QueryData(args)
return nil return nil
} }

View File

@ -13,7 +13,9 @@ import (
"github.com/didi/nightingale/src/modules/transfer/http/routes" "github.com/didi/nightingale/src/modules/transfer/http/routes"
"github.com/didi/nightingale/src/modules/transfer/rpc" "github.com/didi/nightingale/src/modules/transfer/rpc"
"github.com/didi/nightingale/src/toolkits/http" "github.com/didi/nightingale/src/toolkits/http"
"github.com/didi/nightingale/src/toolkits/identity"
tlogger "github.com/didi/nightingale/src/toolkits/logger" tlogger "github.com/didi/nightingale/src/toolkits/logger"
"github.com/didi/nightingale/src/toolkits/report"
"github.com/didi/nightingale/src/toolkits/stats" "github.com/didi/nightingale/src/toolkits/stats"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
@ -61,9 +63,11 @@ func main() {
tlogger.Init(cfg.Logger) tlogger.Init(cfg.Logger)
go stats.Init("n9e.transfer") go stats.Init("n9e.transfer")
identity.Init(cfg.Identity)
backend.Init(cfg.Backend) backend.Init(cfg.Backend)
cron.Init() cron.Init()
go report.Init(cfg.Report, "monapi")
go rpc.Start() go rpc.Start()
r := gin.New() r := gin.New()