refactor transfer datasources for ui/judge, implement tsdb(+index) an… (#246)
* refactor transfer datasources for ui/judge, implement tsdb(+index) and influxdb * fix error string; fix import identidy ; refactor pushendpoint init * fix influx queryData Co-authored-by: wangzhiguo04 <wangzhiguo04@meicai.cn>
This commit is contained in:
parent
b6169ac706
commit
520dda70c0
|
@ -3,6 +3,7 @@ query:
|
||||||
connTimeout: 1000
|
connTimeout: 1000
|
||||||
callTimeout: 2000
|
callTimeout: 2000
|
||||||
indexCallTimeout: 2000
|
indexCallTimeout: 2000
|
||||||
|
indexMod: "index"
|
||||||
|
|
||||||
redis:
|
redis:
|
||||||
addrs:
|
addrs:
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
backend:
|
backend:
|
||||||
maxConns: 20000
|
datasource: "tsdb"
|
||||||
# in ms
|
tsdb:
|
||||||
# connTimeout: 1000
|
enabled: true
|
||||||
# callTimeout: 3000
|
name: "tsdb"
|
||||||
cluster:
|
cluster:
|
||||||
tsdb01: 127.0.0.1:5821
|
tsdb01: 127.0.0.1:5821
|
||||||
influxdb:
|
influxdb:
|
||||||
enabled: false
|
enabled: false
|
||||||
username: "influx"
|
username: "influx"
|
||||||
|
@ -21,6 +21,11 @@ backend:
|
||||||
brokersPeers: "192.168.1.1:9092,192.168.1.2:9092"
|
brokersPeers: "192.168.1.1:9092,192.168.1.2:9092"
|
||||||
topic: "n9e"
|
topic: "n9e"
|
||||||
|
|
||||||
|
identity:
|
||||||
|
specify: ""
|
||||||
|
shell: ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1
|
||||||
|
|
||||||
|
|
||||||
logger:
|
logger:
|
||||||
dir: logs/transfer
|
dir: logs/transfer
|
||||||
level: WARNING
|
level: WARNING
|
||||||
|
|
|
@ -314,6 +314,19 @@ func PKWithCounter(endpoint, counter string) string {
|
||||||
return ret.String()
|
return ret.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetCounter(metric, tag string, tagMap map[string]string) (counter string, err error) {
|
||||||
|
if tagMap == nil {
|
||||||
|
tagMap, err = SplitTagsString(tag)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tagStr := SortedTags(tagMap)
|
||||||
|
counter = PKWithTags(metric, tagStr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
func PKWithTags(metric, tags string) string {
|
func PKWithTags(metric, tags string) string {
|
||||||
ret := bufferPool.Get().(*bytes.Buffer)
|
ret := bufferPool.Get().(*bytes.Buffer)
|
||||||
ret.Reset()
|
ret.Reset()
|
||||||
|
|
|
@ -48,3 +48,56 @@ func (req *QueryData) Key() string {
|
||||||
func (resp *TsdbQueryResponse) Key() string {
|
func (resp *TsdbQueryResponse) Key() string {
|
||||||
return resp.Endpoint + "/" + resp.Counter
|
return resp.Endpoint + "/" + resp.Counter
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type EndpointsRecv struct {
|
||||||
|
Endpoints []string `json:"endpoints"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricResp struct {
|
||||||
|
Metrics []string `json:"metrics"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type EndpointMetricRecv struct {
|
||||||
|
Endpoints []string `json:"endpoints"`
|
||||||
|
Metrics []string `json:"metrics"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type IndexTagkvResp struct {
|
||||||
|
Endpoints []string `json:"endpoints"`
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Tagkv []*TagPair `json:"tagkv"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type TagPair struct {
|
||||||
|
Key string `json:"tagk"` // json 和变量不一致为了兼容前端
|
||||||
|
Values []string `json:"tagv"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CludeRecv struct {
|
||||||
|
Endpoints []string `json:"endpoints"`
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Include []*TagPair `json:"include"`
|
||||||
|
Exclude []*TagPair `json:"exclude"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type XcludeResp struct {
|
||||||
|
Endpoints []string `json:"endpoints"`
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Tags []string `json:"tags"`
|
||||||
|
Step int `json:"step"`
|
||||||
|
DsType string `json:"dstype"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type IndexByFullTagsRecv struct {
|
||||||
|
Endpoints []string `json:"endpoints"`
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Tagkv []TagPair `json:"tagkv"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type IndexByFullTagsResp struct {
|
||||||
|
Endpoints []string `json:"endpoints"`
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Tags []string `json:"tags"`
|
||||||
|
Step int `json:"step"`
|
||||||
|
DsType string `json:"dstype"`
|
||||||
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@ func GetIndexLoop() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetIndex() {
|
func GetIndex() {
|
||||||
instances, err := report.GetAlive("index", "monapi")
|
instances, err := report.GetAlive(Config.IndexMod, "monapi")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
stats.Counter.Set("get.index.err", 1)
|
stats.Counter.Set("get.index.err", 1)
|
||||||
logger.Warningf("get index list err:%v", err)
|
logger.Warningf("get index list err:%v", err)
|
||||||
|
|
|
@ -19,6 +19,7 @@ type SeriesQuerySection struct {
|
||||||
MaxIdle int `json:"maxIdle"` //
|
MaxIdle int `json:"maxIdle"` //
|
||||||
ConnTimeout int `json:"connTimeout"` // 连接超时
|
ConnTimeout int `json:"connTimeout"` // 连接超时
|
||||||
CallTimeout int `json:"callTimeout"` // 请求超时
|
CallTimeout int `json:"callTimeout"` // 请求超时
|
||||||
|
IndexMod string `json:"indexMod"`
|
||||||
IndexPath string `json:"indexPath"`
|
IndexPath string `json:"indexPath"`
|
||||||
IndexCallTimeout int `json:"indexCallTimeout"` // 请求超时
|
IndexCallTimeout int `json:"indexCallTimeout"` // 请求超时
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,7 @@ func Parse(conf string) error {
|
||||||
"connTimeout": 1000,
|
"connTimeout": 1000,
|
||||||
"callTimeout": 2000,
|
"callTimeout": 2000,
|
||||||
"indexCallTimeout": 2000,
|
"indexCallTimeout": 2000,
|
||||||
|
"indexMod": "index",
|
||||||
"indexPath": "/api/index/counter/clude",
|
"indexPath": "/api/index/counter/clude",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,84 @@
|
||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/didi/nightingale/src/dataobj"
|
||||||
|
)
|
||||||
|
|
||||||
|
// send
|
||||||
|
const (
|
||||||
|
DefaultSendTaskSleepInterval = time.Millisecond * 50 //默认睡眠间隔为50ms
|
||||||
|
DefaultSendQueueMaxSize = 102400 //10.24w
|
||||||
|
MaxSendRetry = 10
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
MinStep int //最小上报周期,单位sec
|
||||||
|
)
|
||||||
|
|
||||||
|
type DataSource interface {
|
||||||
|
PushEndpoint
|
||||||
|
|
||||||
|
// query data for judge
|
||||||
|
QueryData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse
|
||||||
|
// query data for ui
|
||||||
|
QueryDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse
|
||||||
|
|
||||||
|
// query metrics & tags
|
||||||
|
QueryMetrics(recv dataobj.EndpointsRecv) *dataobj.MetricResp
|
||||||
|
QueryTagPairs(recv dataobj.EndpointMetricRecv) []dataobj.IndexTagkvResp
|
||||||
|
QueryIndexByClude(recv []dataobj.CludeRecv) []dataobj.XcludeResp
|
||||||
|
QueryIndexByFullTags(recv []dataobj.IndexByFullTagsRecv) []dataobj.IndexByFullTagsResp
|
||||||
|
|
||||||
|
// tsdb instance
|
||||||
|
GetInstance(metric, endpoint string, tags map[string]string) []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type PushEndpoint interface {
|
||||||
|
// push data
|
||||||
|
Push2Queue(items []*dataobj.MetricValue)
|
||||||
|
}
|
||||||
|
|
||||||
|
var registryDataSources map[string]DataSource
|
||||||
|
var registryPushEndpoints map[string]PushEndpoint
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
registryDataSources = make(map[string]DataSource)
|
||||||
|
registryPushEndpoints = make(map[string]PushEndpoint)
|
||||||
|
}
|
||||||
|
|
||||||
|
// get backend datasource
|
||||||
|
// (pluginId == "" for default datasource)
|
||||||
|
func GetDataSourceFor(pluginId string) (DataSource, error) {
|
||||||
|
if pluginId == "" {
|
||||||
|
pluginId = defaultDataSource
|
||||||
|
}
|
||||||
|
if source, exists := registryDataSources[pluginId]; exists {
|
||||||
|
return source, nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("could not find datasource for plugin: %s", pluginId)
|
||||||
|
}
|
||||||
|
|
||||||
|
// get all push endpoints
|
||||||
|
func GetPushEndpoints() ([]PushEndpoint, error) {
|
||||||
|
if len(registryPushEndpoints) > 0 {
|
||||||
|
items := make([]PushEndpoint, 0, len(registryPushEndpoints))
|
||||||
|
for _, value := range registryPushEndpoints {
|
||||||
|
items = append(items, value)
|
||||||
|
}
|
||||||
|
return items, nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("could not find any pushendpoint")
|
||||||
|
}
|
||||||
|
|
||||||
|
func RegisterDataSource(pluginId string, datasource DataSource) {
|
||||||
|
|
||||||
|
registryDataSources[pluginId] = datasource
|
||||||
|
registryPushEndpoints[pluginId] = datasource
|
||||||
|
}
|
||||||
|
|
||||||
|
func RegisterPushEndpoint(pluginId string, push PushEndpoint) {
|
||||||
|
registryPushEndpoints[pluginId] = push
|
||||||
|
}
|
|
@ -0,0 +1,187 @@
|
||||||
|
package influxdb
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/didi/nightingale/src/dataobj"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/stats"
|
||||||
|
"github.com/influxdata/influxdb/client/v2"
|
||||||
|
"github.com/toolkits/pkg/concurrent/semaphore"
|
||||||
|
"github.com/toolkits/pkg/container/list"
|
||||||
|
"github.com/toolkits/pkg/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
type InfluxdbSection struct {
|
||||||
|
Enabled bool `yaml:"enabled"`
|
||||||
|
Name string `yaml:"name"`
|
||||||
|
Batch int `yaml:"batch"`
|
||||||
|
MaxRetry int `yaml:"maxRetry"`
|
||||||
|
WorkerNum int `yaml:"workerNum"`
|
||||||
|
Timeout int `yaml:"timeout"`
|
||||||
|
Address string `yaml:"address"`
|
||||||
|
Database string `yaml:"database"`
|
||||||
|
Username string `yaml:"username"`
|
||||||
|
Password string `yaml:"password"`
|
||||||
|
Precision string `yaml:"precision"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type InfluxdbDataSource struct {
|
||||||
|
// config
|
||||||
|
Section InfluxdbSection
|
||||||
|
SendQueueMaxSize int
|
||||||
|
SendTaskSleepInterval time.Duration
|
||||||
|
|
||||||
|
// 发送缓存队列 node -> queue_of_data
|
||||||
|
InfluxdbQueue *list.SafeListLimited
|
||||||
|
}
|
||||||
|
|
||||||
|
func (influxdb *InfluxdbDataSource) Init() {
|
||||||
|
|
||||||
|
// init queue
|
||||||
|
if influxdb.Section.Enabled {
|
||||||
|
influxdb.InfluxdbQueue = list.NewSafeListLimited(influxdb.SendQueueMaxSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
// init task
|
||||||
|
influxdbConcurrent := influxdb.Section.WorkerNum
|
||||||
|
if influxdbConcurrent < 1 {
|
||||||
|
influxdbConcurrent = 1
|
||||||
|
}
|
||||||
|
go influxdb.send2InfluxdbTask(influxdbConcurrent)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将原始数据插入到influxdb缓存队列
|
||||||
|
func (influxdb *InfluxdbDataSource) Push2Queue(items []*dataobj.MetricValue) {
|
||||||
|
errCnt := 0
|
||||||
|
for _, item := range items {
|
||||||
|
influxdbItem := influxdb.convert2InfluxdbItem(item)
|
||||||
|
isSuccess := influxdb.InfluxdbQueue.PushFront(influxdbItem)
|
||||||
|
|
||||||
|
if !isSuccess {
|
||||||
|
errCnt += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stats.Counter.Set("influxdb.queue.err", errCnt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (influxdb *InfluxdbDataSource) send2InfluxdbTask(concurrent int) {
|
||||||
|
batch := influxdb.Section.Batch // 一次发送,最多batch条数据
|
||||||
|
retry := influxdb.Section.MaxRetry
|
||||||
|
addr := influxdb.Section.Address
|
||||||
|
sema := semaphore.NewSemaphore(concurrent)
|
||||||
|
|
||||||
|
var err error
|
||||||
|
c, err := NewInfluxdbClient(influxdb.Section)
|
||||||
|
defer c.Client.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("init influxdb client fail: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
items := influxdb.InfluxdbQueue.PopBackBy(batch)
|
||||||
|
count := len(items)
|
||||||
|
if count == 0 {
|
||||||
|
time.Sleep(influxdb.SendTaskSleepInterval)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
influxdbItems := make([]*dataobj.InfluxdbItem, count)
|
||||||
|
for i := 0; i < count; i++ {
|
||||||
|
influxdbItems[i] = items[i].(*dataobj.InfluxdbItem)
|
||||||
|
stats.Counter.Set("points.out.influxdb", 1)
|
||||||
|
logger.Debug("send to influxdb: ", influxdbItems[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
// 同步Call + 有限并发 进行发送
|
||||||
|
sema.Acquire()
|
||||||
|
go func(addr string, influxdbItems []*dataobj.InfluxdbItem, count int) {
|
||||||
|
defer sema.Release()
|
||||||
|
sendOk := false
|
||||||
|
|
||||||
|
for i := 0; i < retry; i++ {
|
||||||
|
err = c.Send(influxdbItems)
|
||||||
|
if err == nil {
|
||||||
|
sendOk = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
logger.Warningf("send influxdb fail: %v", err)
|
||||||
|
time.Sleep(time.Millisecond * 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !sendOk {
|
||||||
|
stats.Counter.Set("points.out.influxdb.err", count)
|
||||||
|
logger.Errorf("send %v to influxdb %s fail: %v", influxdbItems, addr, err)
|
||||||
|
} else {
|
||||||
|
logger.Debugf("send to influxdb %s ok", addr)
|
||||||
|
}
|
||||||
|
}(addr, influxdbItems, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (influxdb *InfluxdbDataSource) convert2InfluxdbItem(d *dataobj.MetricValue) *dataobj.InfluxdbItem {
|
||||||
|
t := dataobj.InfluxdbItem{Tags: make(map[string]string), Fields: make(map[string]interface{})}
|
||||||
|
|
||||||
|
for k, v := range d.TagsMap {
|
||||||
|
t.Tags[k] = v
|
||||||
|
}
|
||||||
|
t.Tags["endpoint"] = d.Endpoint
|
||||||
|
t.Measurement = d.Metric
|
||||||
|
t.Fields["value"] = d.Value
|
||||||
|
t.Timestamp = d.Timestamp
|
||||||
|
|
||||||
|
return &t
|
||||||
|
}
|
||||||
|
|
||||||
|
type InfluxClient struct {
|
||||||
|
Client client.Client
|
||||||
|
Database string
|
||||||
|
Precision string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewInfluxdbClient(section InfluxdbSection) (*InfluxClient, error) {
|
||||||
|
c, err := client.NewHTTPClient(client.HTTPConfig{
|
||||||
|
Addr: section.Address,
|
||||||
|
Username: section.Username,
|
||||||
|
Password: section.Password,
|
||||||
|
Timeout: time.Millisecond * time.Duration(section.Timeout),
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &InfluxClient{
|
||||||
|
Client: c,
|
||||||
|
Database: section.Database,
|
||||||
|
Precision: section.Precision,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *InfluxClient) Send(items []*dataobj.InfluxdbItem) error {
|
||||||
|
bp, err := client.NewBatchPoints(client.BatchPointsConfig{
|
||||||
|
Database: c.Database,
|
||||||
|
Precision: c.Precision,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("create batch points error: ", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, item := range items {
|
||||||
|
pt, err := client.NewPoint(item.Measurement, item.Tags, item.Fields, time.Unix(item.Timestamp, 0))
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("create new points error: ", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
bp.AddPoint(pt)
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.Client.Write(bp)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (influxdb *InfluxdbDataSource) GetInstance(metric, endpoint string, tags map[string]string) []string {
|
||||||
|
// influxdb 单实例 或 influx-proxy
|
||||||
|
return []string{influxdb.Section.Address}
|
||||||
|
}
|
|
@ -0,0 +1,169 @@
|
||||||
|
package influxdb
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/didi/nightingale/src/dataobj"
|
||||||
|
"github.com/toolkits/pkg/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ShowSeries struct {
|
||||||
|
Database string
|
||||||
|
Metric string
|
||||||
|
Endpoints []string
|
||||||
|
Include []*dataobj.TagPair
|
||||||
|
Exclude []*dataobj.TagPair
|
||||||
|
Start int64
|
||||||
|
End int64
|
||||||
|
|
||||||
|
RawQuery string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (query *ShowSeries) renderShow() {
|
||||||
|
query.RawQuery = fmt.Sprintf("SHOW SERIES ON \"%s\" FROM \"%s\"", query.Database,
|
||||||
|
query.Metric)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (query *ShowSeries) renderEndpoints() {
|
||||||
|
if len(query.Endpoints) > 0 {
|
||||||
|
// endpoints
|
||||||
|
endpointPart := "("
|
||||||
|
for _, endpoint := range query.Endpoints {
|
||||||
|
endpointPart += fmt.Sprintf(" \"endpoint\"='%s' OR", endpoint)
|
||||||
|
}
|
||||||
|
endpointPart = endpointPart[:len(endpointPart)-len("OR")]
|
||||||
|
endpointPart += ")"
|
||||||
|
query.RawQuery = fmt.Sprintf("\"%s\" WHERE \"%s\"", query.RawQuery, endpointPart)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (query *ShowSeries) renderInclude() {
|
||||||
|
if len(query.Include) > 0 {
|
||||||
|
// include
|
||||||
|
includePart := "("
|
||||||
|
for _, include := range query.Include {
|
||||||
|
for _, value := range include.Values {
|
||||||
|
includePart += fmt.Sprintf(" \"%s\"='%s' OR", include.Key, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
includePart = includePart[:len(includePart)-len("OR")]
|
||||||
|
includePart += ")"
|
||||||
|
if !strings.Contains(query.RawQuery, "WHERE") {
|
||||||
|
query.RawQuery += " WHERE"
|
||||||
|
}
|
||||||
|
query.RawQuery = fmt.Sprintf(" %s AND %s", query.RawQuery, includePart)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (query *ShowSeries) renderExclude() {
|
||||||
|
if len(query.Exclude) > 0 {
|
||||||
|
// exclude
|
||||||
|
excludePart := "("
|
||||||
|
for _, exclude := range query.Exclude {
|
||||||
|
for _, value := range exclude.Values {
|
||||||
|
excludePart += fmt.Sprintf(" \"%s\"='%s' OR", exclude.Key, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
excludePart = excludePart[:len(excludePart)-len("OR")]
|
||||||
|
excludePart += ")"
|
||||||
|
if !strings.Contains(query.RawQuery, "WHERE") {
|
||||||
|
query.RawQuery += " WHERE"
|
||||||
|
}
|
||||||
|
query.RawQuery = fmt.Sprintf(" %s AND %s", query.RawQuery, excludePart)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (query *ShowSeries) renderTimeRange() {
|
||||||
|
// time
|
||||||
|
if strings.Contains(query.RawQuery, "WHERE") {
|
||||||
|
query.RawQuery = fmt.Sprintf("%s AND time >= %d AND time <= %d", query.RawQuery,
|
||||||
|
time.Duration(query.Start)*time.Second,
|
||||||
|
time.Duration(query.End)*time.Second)
|
||||||
|
} else {
|
||||||
|
query.RawQuery = fmt.Sprintf("%s WHERE time >= %d AND time <= %d", query.RawQuery,
|
||||||
|
time.Duration(query.Start)*time.Second,
|
||||||
|
time.Duration(query.End)*time.Second)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type QueryData struct {
|
||||||
|
Start int64
|
||||||
|
End int64
|
||||||
|
Metric string
|
||||||
|
Endpoints []string
|
||||||
|
Tags []string
|
||||||
|
Step int
|
||||||
|
DsType string
|
||||||
|
GroupKey []string //聚合维度
|
||||||
|
AggrFunc string //聚合计算
|
||||||
|
|
||||||
|
RawQuery string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (query *QueryData) renderSelect() {
|
||||||
|
// select
|
||||||
|
if query.AggrFunc != "" && len(query.GroupKey) > 0 {
|
||||||
|
query.RawQuery = ""
|
||||||
|
} else {
|
||||||
|
query.RawQuery = fmt.Sprintf("SELECT \"value\" FROM \"%s\"", query.Metric)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (query *QueryData) renderEndpoints() {
|
||||||
|
// where endpoint
|
||||||
|
if len(query.Endpoints) > 0 {
|
||||||
|
endpointPart := "("
|
||||||
|
for _, endpoint := range query.Endpoints {
|
||||||
|
endpointPart += fmt.Sprintf(" \"endpoint\"='%s' OR", endpoint)
|
||||||
|
}
|
||||||
|
endpointPart = endpointPart[:len(endpointPart)-len("OR")]
|
||||||
|
endpointPart += ")"
|
||||||
|
query.RawQuery = fmt.Sprintf("%s WHERE %s", query.RawQuery, endpointPart)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (query *QueryData) renderTags() {
|
||||||
|
// where tags
|
||||||
|
if len(query.Tags) > 0 {
|
||||||
|
s := strings.Join(query.Tags, ",")
|
||||||
|
tags, err := dataobj.SplitTagsString(s)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warningf("split tags error, %+v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
tagPart := "("
|
||||||
|
for tagK, tagV := range tags {
|
||||||
|
tagPart += fmt.Sprintf(" \"%s\"='%s' AND", tagK, tagV)
|
||||||
|
}
|
||||||
|
tagPart = tagPart[:len(tagPart)-len("AND")]
|
||||||
|
tagPart += ")"
|
||||||
|
|
||||||
|
if strings.Contains(query.RawQuery, "WHERE") {
|
||||||
|
query.RawQuery = fmt.Sprintf("%s AND %s", query.RawQuery, tagPart)
|
||||||
|
} else {
|
||||||
|
query.RawQuery = fmt.Sprintf("%s WHERE %s", query.RawQuery, tagPart)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (query *QueryData) renderTimeRange() {
|
||||||
|
// time
|
||||||
|
if strings.Contains(query.RawQuery, "WHERE") {
|
||||||
|
query.RawQuery = fmt.Sprintf("%s AND time >= %d AND time <= %d", query.RawQuery,
|
||||||
|
time.Duration(query.Start)*time.Second,
|
||||||
|
time.Duration(query.End)*time.Second)
|
||||||
|
} else {
|
||||||
|
query.RawQuery = fmt.Sprintf("%s WHERE time >= %d AND time <= %d", query.RawQuery, query.Start, query.End)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (query *QueryData) renderGroupBy() {
|
||||||
|
// group by
|
||||||
|
if len(query.GroupKey) > 0 {
|
||||||
|
groupByPart := strings.Join(query.GroupKey, ",")
|
||||||
|
query.RawQuery = fmt.Sprintf("%s GROUP BY %s", query.RawQuery, groupByPart)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,426 @@
|
||||||
|
package influxdb
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/influxdata/influxdb/models"
|
||||||
|
|
||||||
|
"github.com/didi/nightingale/src/dataobj"
|
||||||
|
"github.com/influxdata/influxdb/client/v2"
|
||||||
|
"github.com/toolkits/pkg/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
// select value from metric where ...
|
||||||
|
func (influxdb *InfluxdbDataSource) QueryData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse {
|
||||||
|
logger.Debugf("query data, inputs: %+v", inputs)
|
||||||
|
|
||||||
|
c, err := NewInfluxdbClient(influxdb.Section)
|
||||||
|
defer c.Client.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("init influxdb client fail: %v", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
queryResponse := make([]*dataobj.TsdbQueryResponse, 0)
|
||||||
|
for _, input := range inputs {
|
||||||
|
for _, counter := range input.Counters {
|
||||||
|
items := strings.Split(counter, "/")
|
||||||
|
metric := items[0]
|
||||||
|
var tags = make([]string, 0)
|
||||||
|
if len(items) > 1 && len(items[1]) > 0 {
|
||||||
|
tags = strings.Split(items[1], ",")
|
||||||
|
}
|
||||||
|
influxdbQuery := QueryData{
|
||||||
|
Start: input.Start,
|
||||||
|
End: input.End,
|
||||||
|
Metric: metric,
|
||||||
|
Endpoints: input.Endpoints,
|
||||||
|
Tags: tags,
|
||||||
|
Step: input.Step,
|
||||||
|
DsType: input.DsType,
|
||||||
|
}
|
||||||
|
influxdbQuery.renderSelect()
|
||||||
|
influxdbQuery.renderEndpoints()
|
||||||
|
influxdbQuery.renderTags()
|
||||||
|
influxdbQuery.renderTimeRange()
|
||||||
|
logger.Debugf("query influxql %s", influxdbQuery.RawQuery)
|
||||||
|
|
||||||
|
query := client.NewQuery(influxdbQuery.RawQuery, c.Database, c.Precision)
|
||||||
|
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
|
||||||
|
for _, result := range response.Results {
|
||||||
|
for _, series := range result.Series {
|
||||||
|
|
||||||
|
// fixme : influx client get series.Tags is nil
|
||||||
|
endpoint := series.Tags["endpoint"]
|
||||||
|
delete(series.Tags, endpoint)
|
||||||
|
counter, err := dataobj.GetCounter(series.Name, "", series.Tags)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warningf("get counter error: %+v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
values := convertValues(series)
|
||||||
|
|
||||||
|
resp := &dataobj.TsdbQueryResponse{
|
||||||
|
Start: influxdbQuery.Start,
|
||||||
|
End: influxdbQuery.End,
|
||||||
|
Endpoint: endpoint,
|
||||||
|
Counter: counter,
|
||||||
|
DsType: influxdbQuery.DsType,
|
||||||
|
Step: influxdbQuery.Step,
|
||||||
|
Values: values,
|
||||||
|
}
|
||||||
|
queryResponse = append(queryResponse, resp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return queryResponse
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo : 支持 comparison
|
||||||
|
// select value from metric where ...
|
||||||
|
func (influxdb *InfluxdbDataSource) QueryDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse {
|
||||||
|
|
||||||
|
logger.Debugf("query data for ui, input: %+v", input)
|
||||||
|
|
||||||
|
c, err := NewInfluxdbClient(influxdb.Section)
|
||||||
|
defer c.Client.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("init influxdb client fail: %v", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
influxdbQuery := QueryData{
|
||||||
|
Start: input.Start,
|
||||||
|
End: input.End,
|
||||||
|
Metric: input.Metric,
|
||||||
|
Endpoints: input.Endpoints,
|
||||||
|
Tags: input.Tags,
|
||||||
|
Step: input.Step,
|
||||||
|
DsType: input.DsType,
|
||||||
|
GroupKey: input.GroupKey,
|
||||||
|
AggrFunc: input.AggrFunc,
|
||||||
|
}
|
||||||
|
influxdbQuery.renderSelect()
|
||||||
|
influxdbQuery.renderEndpoints()
|
||||||
|
influxdbQuery.renderTags()
|
||||||
|
influxdbQuery.renderTimeRange()
|
||||||
|
influxdbQuery.renderGroupBy()
|
||||||
|
logger.Debugf("query influxql %s", influxdbQuery.RawQuery)
|
||||||
|
|
||||||
|
queryResponse := make([]*dataobj.TsdbQueryResponse, 0)
|
||||||
|
query := client.NewQuery(influxdbQuery.RawQuery, c.Database, c.Precision)
|
||||||
|
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
|
||||||
|
|
||||||
|
for _, result := range response.Results {
|
||||||
|
for _, series := range result.Series {
|
||||||
|
|
||||||
|
// fixme : influx client get series.Tags is nil
|
||||||
|
endpoint := series.Tags["endpoint"]
|
||||||
|
delete(series.Tags, endpoint)
|
||||||
|
counter, err := dataobj.GetCounter(series.Name, "", series.Tags)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warningf("get counter error: %+v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
values := convertValues(series)
|
||||||
|
|
||||||
|
resp := &dataobj.TsdbQueryResponse{
|
||||||
|
Start: influxdbQuery.Start,
|
||||||
|
End: influxdbQuery.End,
|
||||||
|
Endpoint: endpoint,
|
||||||
|
Counter: counter,
|
||||||
|
DsType: influxdbQuery.DsType,
|
||||||
|
Step: influxdbQuery.Step,
|
||||||
|
Values: values,
|
||||||
|
}
|
||||||
|
queryResponse = append(queryResponse, resp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return queryResponse
|
||||||
|
}
|
||||||
|
|
||||||
|
// show measurements on n9e
|
||||||
|
func (influxdb *InfluxdbDataSource) QueryMetrics(recv dataobj.EndpointsRecv) *dataobj.MetricResp {
|
||||||
|
logger.Debugf("query metric, recv: %+v", recv)
|
||||||
|
|
||||||
|
c, err := NewInfluxdbClient(influxdb.Section)
|
||||||
|
defer c.Client.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("init influxdb client fail: %v", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
influxql := fmt.Sprintf("SHOW MEASUREMENTS ON \"%s\"", influxdb.Section.Database)
|
||||||
|
query := client.NewQuery(influxql, c.Database, c.Precision)
|
||||||
|
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
|
||||||
|
resp := &dataobj.MetricResp{
|
||||||
|
Metrics: make([]string, 0),
|
||||||
|
}
|
||||||
|
for _, result := range response.Results {
|
||||||
|
for _, series := range result.Series {
|
||||||
|
for _, valuePair := range series.Values {
|
||||||
|
metric := valuePair[0].(string)
|
||||||
|
resp.Metrics = append(resp.Metrics, metric)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// show tag keys / values from metric ...
|
||||||
|
func (influxdb *InfluxdbDataSource) QueryTagPairs(recv dataobj.EndpointMetricRecv) []dataobj.IndexTagkvResp {
|
||||||
|
logger.Debugf("query tag pairs, recv: %+v", recv)
|
||||||
|
|
||||||
|
c, err := NewInfluxdbClient(influxdb.Section)
|
||||||
|
defer c.Client.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("init influxdb client fail: %v", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := make([]dataobj.IndexTagkvResp, 0)
|
||||||
|
for _, metric := range recv.Metrics {
|
||||||
|
tagkvResp := dataobj.IndexTagkvResp{
|
||||||
|
Endpoints: recv.Endpoints,
|
||||||
|
Metric: metric,
|
||||||
|
Tagkv: make([]*dataobj.TagPair, 0),
|
||||||
|
}
|
||||||
|
// show tag keys
|
||||||
|
keys := showTagKeys(c, metric, influxdb.Section.Database)
|
||||||
|
if len(keys) > 0 {
|
||||||
|
// show tag values
|
||||||
|
tagkvResp.Tagkv = showTagValues(c, keys, metric, influxdb.Section.Database)
|
||||||
|
}
|
||||||
|
resp = append(resp, tagkvResp)
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp
|
||||||
|
}
|
||||||
|
|
||||||
|
// show tag keys on n9e from metric where ...
|
||||||
|
// (exclude default endpoint tag)
|
||||||
|
func showTagKeys(c *InfluxClient, metric, database string) []string {
|
||||||
|
keys := make([]string, 0)
|
||||||
|
influxql := fmt.Sprintf("SHOW TAG KEYS ON \"%s\" FROM \"%s\"", database, metric)
|
||||||
|
query := client.NewQuery(influxql, c.Database, c.Precision)
|
||||||
|
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
|
||||||
|
for _, result := range response.Results {
|
||||||
|
for _, series := range result.Series {
|
||||||
|
for _, valuePair := range series.Values {
|
||||||
|
tagKey := valuePair[0].(string)
|
||||||
|
// 去掉默认tag endpoint
|
||||||
|
if tagKey != "endpoint" {
|
||||||
|
keys = append(keys, tagKey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
|
// show tag values on n9e from metric where ...
|
||||||
|
func showTagValues(c *InfluxClient, keys []string, metric, database string) []*dataobj.TagPair {
|
||||||
|
tagkv := make([]*dataobj.TagPair, 0)
|
||||||
|
influxql := fmt.Sprintf("SHOW TAG VALUES ON \"%s\" FROM \"%s\" WITH KEY in (\"%s\")",
|
||||||
|
database,
|
||||||
|
metric, strings.Join(keys, "\",\""))
|
||||||
|
query := client.NewQuery(influxql, c.Database, c.Precision)
|
||||||
|
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
|
||||||
|
tagPairs := make(map[string]*dataobj.TagPair)
|
||||||
|
for _, result := range response.Results {
|
||||||
|
for _, series := range result.Series {
|
||||||
|
for _, valuePair := range series.Values {
|
||||||
|
tagKey := valuePair[0].(string)
|
||||||
|
tagValue := valuePair[1].(string)
|
||||||
|
if pair, exist := tagPairs[tagKey]; exist {
|
||||||
|
pair.Values = append(pair.Values, tagValue)
|
||||||
|
} else {
|
||||||
|
pair := &dataobj.TagPair{
|
||||||
|
Key: tagKey,
|
||||||
|
Values: []string{tagValue},
|
||||||
|
}
|
||||||
|
tagPairs[pair.Key] = pair
|
||||||
|
tagkv = append(tagkv, pair)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tagkv
|
||||||
|
}
|
||||||
|
|
||||||
|
// show series from metric where ...
|
||||||
|
func (influxdb *InfluxdbDataSource) QueryIndexByClude(recvs []dataobj.CludeRecv) []dataobj.XcludeResp {
|
||||||
|
logger.Debugf("query IndexByClude , recv: %+v", recvs)
|
||||||
|
|
||||||
|
c, err := NewInfluxdbClient(influxdb.Section)
|
||||||
|
defer c.Client.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("init influxdb client fail: %v", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
resp := make([]dataobj.XcludeResp, 0)
|
||||||
|
for _, recv := range recvs {
|
||||||
|
xcludeResp := dataobj.XcludeResp{
|
||||||
|
Endpoints: recv.Endpoints,
|
||||||
|
Metric: recv.Metric,
|
||||||
|
Tags: make([]string, 0),
|
||||||
|
Step: -1, // fixme
|
||||||
|
DsType: "GAUGE",
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(recv.Endpoints) == 0 {
|
||||||
|
resp = append(resp, xcludeResp)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
showSeries := ShowSeries{
|
||||||
|
Database: influxdb.Section.Database,
|
||||||
|
Metric: recv.Metric,
|
||||||
|
Endpoints: recv.Endpoints,
|
||||||
|
Start: time.Now().AddDate(0, 0, -30).Unix(),
|
||||||
|
End: time.Now().Unix(),
|
||||||
|
Include: recv.Include,
|
||||||
|
Exclude: recv.Exclude,
|
||||||
|
}
|
||||||
|
showSeries.renderShow()
|
||||||
|
showSeries.renderEndpoints()
|
||||||
|
showSeries.renderInclude()
|
||||||
|
showSeries.renderExclude()
|
||||||
|
|
||||||
|
query := client.NewQuery(showSeries.RawQuery, c.Database, c.Precision)
|
||||||
|
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
|
||||||
|
for _, result := range response.Results {
|
||||||
|
for _, series := range result.Series {
|
||||||
|
for _, valuePair := range series.Values {
|
||||||
|
|
||||||
|
// proc.port.listen,endpoint=localhost,port=22,service=sshd
|
||||||
|
tagKey := valuePair[0].(string)
|
||||||
|
|
||||||
|
// process
|
||||||
|
items := strings.Split(tagKey, ",")
|
||||||
|
newItems := make([]string, 0)
|
||||||
|
for _, item := range items {
|
||||||
|
if item != recv.Metric && !strings.Contains(item, "endpoint") {
|
||||||
|
newItems = append(newItems, item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(newItems) > 0 {
|
||||||
|
if tags, err := dataobj.SplitTagsString(strings.Join(newItems, ",")); err == nil {
|
||||||
|
xcludeResp.Tags = append(xcludeResp.Tags, dataobj.SortedTags(tags))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resp = append(resp, xcludeResp)
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp
|
||||||
|
}
|
||||||
|
|
||||||
|
// show series from metric where ...
|
||||||
|
func (influxdb *InfluxdbDataSource) QueryIndexByFullTags(recvs []dataobj.IndexByFullTagsRecv) []dataobj.
|
||||||
|
IndexByFullTagsResp {
|
||||||
|
logger.Debugf("query IndexByFullTags , recv: %+v", recvs)
|
||||||
|
|
||||||
|
c, err := NewInfluxdbClient(influxdb.Section)
|
||||||
|
defer c.Client.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("init influxdb client fail: %v", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := make([]dataobj.IndexByFullTagsResp, 0)
|
||||||
|
for _, recv := range recvs {
|
||||||
|
fullTagResp := dataobj.IndexByFullTagsResp{
|
||||||
|
Endpoints: recv.Endpoints,
|
||||||
|
Metric: recv.Metric,
|
||||||
|
Tags: make([]string, 0),
|
||||||
|
Step: -1, // FIXME
|
||||||
|
DsType: "GAUGE",
|
||||||
|
}
|
||||||
|
|
||||||
|
// 兼容夜莺逻辑,不选择endpoint则返回空
|
||||||
|
if len(recv.Endpoints) == 0 {
|
||||||
|
resp = append(resp, fullTagResp)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// build influxql
|
||||||
|
influxdbShow := ShowSeries{
|
||||||
|
Database: influxdb.Section.Database,
|
||||||
|
Metric: recv.Metric,
|
||||||
|
Endpoints: recv.Endpoints,
|
||||||
|
Start: time.Now().AddDate(0, 0, -30).Unix(),
|
||||||
|
End: time.Now().Unix(),
|
||||||
|
}
|
||||||
|
influxdbShow.renderShow()
|
||||||
|
influxdbShow.renderEndpoints()
|
||||||
|
influxdbShow.renderTimeRange()
|
||||||
|
|
||||||
|
// do query
|
||||||
|
query := client.NewQuery(influxdbShow.RawQuery, c.Database, c.Precision)
|
||||||
|
if response, err := c.Client.Query(query); err == nil && response.Error() == nil {
|
||||||
|
for _, result := range response.Results {
|
||||||
|
for _, series := range result.Series {
|
||||||
|
for _, valuePair := range series.Values {
|
||||||
|
|
||||||
|
// proc.port.listen,endpoint=localhost,port=22,service=sshd
|
||||||
|
tagKey := valuePair[0].(string)
|
||||||
|
|
||||||
|
// process
|
||||||
|
items := strings.Split(tagKey, ",")
|
||||||
|
newItems := make([]string, 0)
|
||||||
|
for _, item := range items {
|
||||||
|
if item != recv.Metric && !strings.Contains(item, "endpoint") {
|
||||||
|
newItems = append(newItems, item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(newItems) > 0 {
|
||||||
|
if tags, err := dataobj.SplitTagsString(strings.Join(newItems, ",")); err == nil {
|
||||||
|
fullTagResp.Tags = append(fullTagResp.Tags, dataobj.SortedTags(tags))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resp = append(resp, fullTagResp)
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp
|
||||||
|
}
|
||||||
|
|
||||||
|
func convertValues(series models.Row) []*dataobj.RRDData {
|
||||||
|
|
||||||
|
// convert values
|
||||||
|
values := make([]*dataobj.RRDData, 0, len(series.Values))
|
||||||
|
for _, valuePair := range series.Values {
|
||||||
|
timestampNumber, _ := valuePair[0].(json.Number)
|
||||||
|
timestamp, _ := timestampNumber.Int64()
|
||||||
|
|
||||||
|
valueNumber, _ := valuePair[1].(json.Number)
|
||||||
|
valueFloat, _ := valueNumber.Float64()
|
||||||
|
values = append(values, dataobj.NewRRDData(timestamp, valueFloat))
|
||||||
|
}
|
||||||
|
return values
|
||||||
|
}
|
|
@ -1,166 +1,76 @@
|
||||||
package backend
|
package backend
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/toolkits/pkg/container/list"
|
"github.com/didi/nightingale/src/modules/transfer/backend/influxdb"
|
||||||
"github.com/toolkits/pkg/container/set"
|
"github.com/didi/nightingale/src/modules/transfer/backend/tsdb"
|
||||||
"github.com/toolkits/pkg/str"
|
|
||||||
|
|
||||||
"github.com/didi/nightingale/src/modules/transfer/cache"
|
|
||||||
"github.com/didi/nightingale/src/toolkits/pools"
|
|
||||||
"github.com/didi/nightingale/src/toolkits/report"
|
|
||||||
"github.com/didi/nightingale/src/toolkits/stats"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type InfluxdbSection struct {
|
|
||||||
Enabled bool `yaml:"enabled"`
|
|
||||||
Batch int `yaml:"batch"`
|
|
||||||
MaxRetry int `yaml:"maxRetry"`
|
|
||||||
WorkerNum int `yaml:"workerNum"`
|
|
||||||
Timeout int `yaml:"timeout"`
|
|
||||||
Address string `yaml:"address"`
|
|
||||||
Database string `yaml:"database"`
|
|
||||||
Username string `yaml:"username"`
|
|
||||||
Password string `yaml:"password"`
|
|
||||||
Precision string `yaml:"precision"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type OpenTsdbSection struct {
|
|
||||||
Enabled bool `yaml:"enabled"`
|
|
||||||
Batch int `yaml:"batch"`
|
|
||||||
ConnTimeout int `yaml:"connTimeout"`
|
|
||||||
CallTimeout int `yaml:"callTimeout"`
|
|
||||||
WorkerNum int `yaml:"workerNum"`
|
|
||||||
MaxConns int `yaml:"maxConns"`
|
|
||||||
MaxIdle int `yaml:"maxIdle"`
|
|
||||||
MaxRetry int `yaml:"maxRetry"`
|
|
||||||
Address string `yaml:"address"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type KafkaSection struct {
|
|
||||||
Enabled bool `yaml:"enabled"`
|
|
||||||
Topic string `yaml:"topic"`
|
|
||||||
BrokersPeers string `yaml:"brokersPeers"`
|
|
||||||
SaslUser string `yaml:"saslUser"`
|
|
||||||
SaslPasswd string `yaml:"saslPasswd"`
|
|
||||||
Retry int `yaml:"retry"`
|
|
||||||
KeepAlive int64 `yaml:"keepAlive"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type BackendSection struct {
|
type BackendSection struct {
|
||||||
Enabled bool `yaml:"enabled"`
|
DataSource string `yaml:"datasource"`
|
||||||
Batch int `yaml:"batch"`
|
StraPath string `yaml:"straPath"`
|
||||||
ConnTimeout int `yaml:"connTimeout"`
|
|
||||||
CallTimeout int `yaml:"callTimeout"`
|
|
||||||
WorkerNum int `yaml:"workerNum"`
|
|
||||||
MaxConns int `yaml:"maxConns"`
|
|
||||||
MaxIdle int `yaml:"maxIdle"`
|
|
||||||
IndexTimeout int `yaml:"indexTimeout"`
|
|
||||||
StraPath string `yaml:"straPath"`
|
|
||||||
HbsMod string `yaml:"hbsMod"`
|
|
||||||
|
|
||||||
Replicas int `yaml:"replicas"`
|
Judge JudgeSection `yaml:"judge"`
|
||||||
Cluster map[string]string `yaml:"cluster"`
|
Tsdb tsdb.TsdbSection `yaml:"tsdb"`
|
||||||
ClusterList map[string]*ClusterNode `json:"clusterList"`
|
Influxdb influxdb.InfluxdbSection `yaml:"influxdb"`
|
||||||
Influxdb InfluxdbSection `yaml:"influxdb"`
|
OpenTsdb OpenTsdbSection `yaml:"opentsdb"`
|
||||||
OpenTsdb OpenTsdbSection `yaml:"opentsdb"`
|
Kafka KafkaSection `yaml:"kafka"`
|
||||||
Kafka KafkaSection `yaml:"kafka"`
|
|
||||||
}
|
|
||||||
|
|
||||||
const DefaultSendQueueMaxSize = 102400 //10.24w
|
|
||||||
|
|
||||||
type ClusterNode struct {
|
|
||||||
Addrs []string `json:"addrs"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
Config BackendSection
|
defaultDataSource string
|
||||||
// 服务节点的一致性哈希环 pk -> node
|
StraPath string
|
||||||
TsdbNodeRing *ConsistentHashRing
|
tsdbDataSource *tsdb.TsdbDataSource
|
||||||
|
openTSDBPushEndpoint *OpenTsdbPushEndpoint
|
||||||
// 发送缓存队列 node -> queue_of_data
|
influxdbDataSource *influxdb.InfluxdbDataSource
|
||||||
TsdbQueues = make(map[string]*list.SafeListLimited)
|
kafkaPushEndpoint *KafkaPushEndpoint
|
||||||
JudgeQueues = cache.SafeJudgeQueue{}
|
|
||||||
InfluxdbQueue *list.SafeListLimited
|
|
||||||
OpenTsdbQueue *list.SafeListLimited
|
|
||||||
KafkaQueue = make(chan KafkaData, 10)
|
|
||||||
|
|
||||||
// 连接池 node_address -> connection_pool
|
|
||||||
TsdbConnPools *pools.ConnPools
|
|
||||||
JudgeConnPools *pools.ConnPools
|
|
||||||
OpenTsdbConnPoolHelper *pools.OpenTsdbConnPoolHelper
|
|
||||||
|
|
||||||
connTimeout int32
|
|
||||||
callTimeout int32
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func Init(cfg BackendSection) {
|
func Init(cfg BackendSection) {
|
||||||
Config = cfg
|
defaultDataSource = cfg.DataSource
|
||||||
// 初始化默认参数
|
StraPath = cfg.StraPath
|
||||||
connTimeout = int32(Config.ConnTimeout)
|
|
||||||
callTimeout = int32(Config.CallTimeout)
|
|
||||||
|
|
||||||
initHashRing()
|
// init judge
|
||||||
initConnPools()
|
InitJudge(cfg.Judge)
|
||||||
initSendQueues()
|
|
||||||
|
|
||||||
startSendTasks()
|
// init tsdb
|
||||||
}
|
if cfg.Tsdb.Enabled {
|
||||||
|
tsdbDataSource = &tsdb.TsdbDataSource{
|
||||||
func initHashRing() {
|
Section: cfg.Tsdb,
|
||||||
TsdbNodeRing = NewConsistentHashRing(int32(Config.Replicas), str.KeysOfMap(Config.Cluster))
|
SendQueueMaxSize: DefaultSendQueueMaxSize,
|
||||||
}
|
SendTaskSleepInterval: DefaultSendTaskSleepInterval,
|
||||||
|
|
||||||
func initConnPools() {
|
|
||||||
tsdbInstances := set.NewSafeSet()
|
|
||||||
for _, item := range Config.ClusterList {
|
|
||||||
for _, addr := range item.Addrs {
|
|
||||||
tsdbInstances.Add(addr)
|
|
||||||
}
|
}
|
||||||
|
tsdbDataSource.Init() // register
|
||||||
|
RegisterDataSource(tsdbDataSource.Section.Name, tsdbDataSource)
|
||||||
}
|
}
|
||||||
TsdbConnPools = pools.NewConnPools(
|
|
||||||
Config.MaxConns, Config.MaxIdle, Config.ConnTimeout, Config.CallTimeout, tsdbInstances.ToSlice(),
|
|
||||||
)
|
|
||||||
|
|
||||||
JudgeConnPools = pools.NewConnPools(
|
// init influxdb
|
||||||
Config.MaxConns, Config.MaxIdle, Config.ConnTimeout, Config.CallTimeout, GetJudges(),
|
if cfg.Influxdb.Enabled {
|
||||||
)
|
influxdbDataSource = &influxdb.InfluxdbDataSource{
|
||||||
if Config.OpenTsdb.Enabled {
|
Section: cfg.Influxdb,
|
||||||
OpenTsdbConnPoolHelper = pools.NewOpenTsdbConnPoolHelper(Config.OpenTsdb.Address, Config.OpenTsdb.MaxConns, Config.OpenTsdb.MaxIdle, Config.OpenTsdb.ConnTimeout, Config.OpenTsdb.CallTimeout)
|
SendQueueMaxSize: DefaultSendQueueMaxSize,
|
||||||
}
|
SendTaskSleepInterval: DefaultSendTaskSleepInterval,
|
||||||
}
|
|
||||||
|
|
||||||
func initSendQueues() {
|
|
||||||
for node, item := range Config.ClusterList {
|
|
||||||
for _, addr := range item.Addrs {
|
|
||||||
TsdbQueues[node+addr] = list.NewSafeListLimited(DefaultSendQueueMaxSize)
|
|
||||||
}
|
}
|
||||||
}
|
influxdbDataSource.Init()
|
||||||
|
// register
|
||||||
|
RegisterDataSource(influxdbDataSource.Section.Name, influxdbDataSource)
|
||||||
|
|
||||||
JudgeQueues = cache.NewJudgeQueue()
|
|
||||||
judges := GetJudges()
|
|
||||||
for _, judge := range judges {
|
|
||||||
JudgeQueues.Set(judge, list.NewSafeListLimited(DefaultSendQueueMaxSize))
|
|
||||||
}
|
}
|
||||||
|
// init opentsdb
|
||||||
if Config.Influxdb.Enabled {
|
if cfg.OpenTsdb.Enabled {
|
||||||
InfluxdbQueue = list.NewSafeListLimited(DefaultSendQueueMaxSize)
|
openTSDBPushEndpoint = &OpenTsdbPushEndpoint{
|
||||||
|
Section: cfg.OpenTsdb,
|
||||||
|
}
|
||||||
|
openTSDBPushEndpoint.Init()
|
||||||
|
// register
|
||||||
|
RegisterPushEndpoint(openTSDBPushEndpoint.Section.Name, openTSDBPushEndpoint)
|
||||||
}
|
}
|
||||||
|
// init kafka
|
||||||
if Config.OpenTsdb.Enabled {
|
if cfg.Kafka.Enabled {
|
||||||
OpenTsdbQueue = list.NewSafeListLimited(DefaultSendQueueMaxSize)
|
kafkaPushEndpoint = &KafkaPushEndpoint{
|
||||||
|
Section: cfg.Kafka,
|
||||||
|
}
|
||||||
|
kafkaPushEndpoint.Init()
|
||||||
|
// register
|
||||||
|
RegisterPushEndpoint(kafkaPushEndpoint.Section.Name, kafkaPushEndpoint)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetJudges() []string {
|
|
||||||
var judgeInstances []string
|
|
||||||
instances, err := report.GetAlive("judge", Config.HbsMod)
|
|
||||||
if err != nil {
|
|
||||||
stats.Counter.Set("judge.get.err", 1)
|
|
||||||
return judgeInstances
|
|
||||||
}
|
|
||||||
for _, instance := range instances {
|
|
||||||
judgeInstance := instance.Identity + ":" + instance.RPCPort
|
|
||||||
judgeInstances = append(judgeInstances, judgeInstance)
|
|
||||||
}
|
|
||||||
return judgeInstances
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,191 @@
|
||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/didi/nightingale/src/dataobj"
|
||||||
|
"github.com/didi/nightingale/src/model"
|
||||||
|
"github.com/didi/nightingale/src/modules/transfer/cache"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/pools"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/report"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/stats"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/str"
|
||||||
|
"github.com/toolkits/pkg/concurrent/semaphore"
|
||||||
|
"github.com/toolkits/pkg/container/list"
|
||||||
|
"github.com/toolkits/pkg/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
type JudgeSection struct {
|
||||||
|
Batch int `yaml:"batch"`
|
||||||
|
ConnTimeout int `yaml:"connTimeout"`
|
||||||
|
CallTimeout int `yaml:"callTimeout"`
|
||||||
|
WorkerNum int `yaml:"workerNum"`
|
||||||
|
MaxConns int `yaml:"maxConns"`
|
||||||
|
MaxIdle int `yaml:"maxIdle"`
|
||||||
|
HbsMod string `yaml:"hbsMod"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// config
|
||||||
|
Judge JudgeSection
|
||||||
|
|
||||||
|
// 连接池 node_address -> connection_pool
|
||||||
|
JudgeConnPools *pools.ConnPools
|
||||||
|
|
||||||
|
// queue
|
||||||
|
JudgeQueues = cache.SafeJudgeQueue{}
|
||||||
|
)
|
||||||
|
|
||||||
|
func InitJudge(section JudgeSection) {
|
||||||
|
Judge = section
|
||||||
|
|
||||||
|
judges := GetJudges()
|
||||||
|
|
||||||
|
// init connPool
|
||||||
|
JudgeConnPools = pools.NewConnPools(Judge.MaxConns, Judge.MaxIdle, Judge.ConnTimeout, Judge.CallTimeout, judges)
|
||||||
|
|
||||||
|
// init queue
|
||||||
|
JudgeQueues = cache.NewJudgeQueue()
|
||||||
|
for _, judgeNode := range judges {
|
||||||
|
JudgeQueues.Set(judgeNode, list.NewSafeListLimited(DefaultSendQueueMaxSize))
|
||||||
|
}
|
||||||
|
|
||||||
|
// start task
|
||||||
|
judgeConcurrent := Judge.WorkerNum
|
||||||
|
if judgeConcurrent < 1 {
|
||||||
|
judgeConcurrent = 1
|
||||||
|
}
|
||||||
|
judgeQueue := JudgeQueues.GetAll()
|
||||||
|
for instance, queue := range judgeQueue {
|
||||||
|
go Send2JudgeTask(queue, instance, judgeConcurrent)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func Send2JudgeTask(Q *list.SafeListLimited, addr string, concurrent int) {
|
||||||
|
batch := Judge.Batch
|
||||||
|
sema := semaphore.NewSemaphore(concurrent)
|
||||||
|
|
||||||
|
for {
|
||||||
|
items := Q.PopBackBy(batch)
|
||||||
|
count := len(items)
|
||||||
|
if count == 0 {
|
||||||
|
time.Sleep(DefaultSendTaskSleepInterval)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
judgeItems := make([]*dataobj.JudgeItem, count)
|
||||||
|
stats.Counter.Set("points.out.judge", count)
|
||||||
|
for i := 0; i < count; i++ {
|
||||||
|
judgeItems[i] = items[i].(*dataobj.JudgeItem)
|
||||||
|
logger.Debug("send to judge: ", judgeItems[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
sema.Acquire()
|
||||||
|
go func(addr string, judgeItems []*dataobj.JudgeItem, count int) {
|
||||||
|
defer sema.Release()
|
||||||
|
|
||||||
|
resp := &dataobj.SimpleRpcResponse{}
|
||||||
|
var err error
|
||||||
|
sendOk := false
|
||||||
|
for i := 0; i < MaxSendRetry; i++ {
|
||||||
|
err = JudgeConnPools.Call(addr, "Judge.Send", judgeItems, resp)
|
||||||
|
if err == nil {
|
||||||
|
sendOk = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
logger.Warningf("send judge %s fail: %v", addr, err)
|
||||||
|
time.Sleep(time.Millisecond * 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !sendOk {
|
||||||
|
stats.Counter.Set("points.out.err", count)
|
||||||
|
for _, item := range judgeItems {
|
||||||
|
logger.Errorf("send %v to judge %s fail: %v", item, addr, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}(addr, judgeItems, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Push2JudgeQueue(items []*dataobj.MetricValue) {
|
||||||
|
errCnt := 0
|
||||||
|
for _, item := range items {
|
||||||
|
key := str.PK(item.Metric, item.Endpoint)
|
||||||
|
stras := cache.StraMap.GetByKey(key)
|
||||||
|
|
||||||
|
for _, stra := range stras {
|
||||||
|
if !TagMatch(stra.Tags, item.TagsMap) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
judgeItem := &dataobj.JudgeItem{
|
||||||
|
Endpoint: item.Endpoint,
|
||||||
|
Metric: item.Metric,
|
||||||
|
Value: item.Value,
|
||||||
|
Timestamp: item.Timestamp,
|
||||||
|
DsType: item.CounterType,
|
||||||
|
Tags: item.Tags,
|
||||||
|
TagsMap: item.TagsMap,
|
||||||
|
Step: int(item.Step),
|
||||||
|
Sid: stra.Id,
|
||||||
|
Extra: item.Extra,
|
||||||
|
}
|
||||||
|
|
||||||
|
q, exists := JudgeQueues.Get(stra.JudgeInstance)
|
||||||
|
if exists {
|
||||||
|
if !q.PushFront(judgeItem) {
|
||||||
|
errCnt += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stats.Counter.Set("judge.queue.err", errCnt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func alignTs(ts int64, period int64) int64 {
|
||||||
|
return ts - ts%period
|
||||||
|
}
|
||||||
|
|
||||||
|
func TagMatch(straTags []model.Tag, tag map[string]string) bool {
|
||||||
|
for _, stag := range straTags {
|
||||||
|
if _, exists := tag[stag.Tkey]; !exists {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
var match bool
|
||||||
|
if stag.Topt == "=" { //当前策略 tagkey 对应的 tagv
|
||||||
|
for _, v := range stag.Tval {
|
||||||
|
if tag[stag.Tkey] == v {
|
||||||
|
match = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
match = true
|
||||||
|
for _, v := range stag.Tval {
|
||||||
|
if tag[stag.Tkey] == v {
|
||||||
|
match = false
|
||||||
|
return match
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !match {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetJudges() []string {
|
||||||
|
var judgeInstances []string
|
||||||
|
instances, err := report.GetAlive("judge", Judge.HbsMod)
|
||||||
|
if err != nil {
|
||||||
|
stats.Counter.Set("judge.get.err", 1)
|
||||||
|
return judgeInstances
|
||||||
|
}
|
||||||
|
for _, instance := range instances {
|
||||||
|
judgeInstance := instance.Identity + ":" + instance.RPCPort
|
||||||
|
judgeInstances = append(judgeInstances, judgeInstance)
|
||||||
|
}
|
||||||
|
return judgeInstances
|
||||||
|
}
|
|
@ -4,13 +4,81 @@ import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/Shopify/sarama"
|
|
||||||
"github.com/toolkits/pkg/logger"
|
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/Shopify/sarama"
|
||||||
|
"github.com/didi/nightingale/src/dataobj"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/stats"
|
||||||
|
"github.com/toolkits/pkg/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type KafkaSection struct {
|
||||||
|
Enabled bool `yaml:"enabled"`
|
||||||
|
Name string `yaml:"name"`
|
||||||
|
Topic string `yaml:"topic"`
|
||||||
|
BrokersPeers string `yaml:"brokersPeers"`
|
||||||
|
ConnTimeout int `yaml:"connTimeout"`
|
||||||
|
CallTimeout int `yaml:"callTimeout"`
|
||||||
|
MaxRetry int `yaml:"maxRetry"`
|
||||||
|
KeepAlive int64 `yaml:"keepAlive"`
|
||||||
|
SaslUser string `yaml:"saslUser"`
|
||||||
|
SaslPasswd string `yaml:"saslPasswd"`
|
||||||
|
}
|
||||||
|
type KafkaPushEndpoint struct {
|
||||||
|
// config
|
||||||
|
Section KafkaSection
|
||||||
|
|
||||||
|
// 发送缓存队列 node -> queue_of_data
|
||||||
|
KafkaQueue chan KafkaData
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kafka *KafkaPushEndpoint) Init() {
|
||||||
|
|
||||||
|
// init queue
|
||||||
|
kafka.KafkaQueue = make(chan KafkaData, 10)
|
||||||
|
|
||||||
|
// start task
|
||||||
|
go kafka.send2KafkaTask()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kafka *KafkaPushEndpoint) Push2Queue(items []*dataobj.MetricValue) {
|
||||||
|
for _, item := range items {
|
||||||
|
kafka.KafkaQueue <- kafka.convert2KafkaItem(item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kafka *KafkaPushEndpoint) convert2KafkaItem(d *dataobj.MetricValue) KafkaData {
|
||||||
|
m := make(KafkaData)
|
||||||
|
m["metric"] = d.Metric
|
||||||
|
m["value"] = d.Value
|
||||||
|
m["timestamp"] = d.Timestamp
|
||||||
|
m["value"] = d.Value
|
||||||
|
m["step"] = d.Step
|
||||||
|
m["endpoint"] = d.Endpoint
|
||||||
|
m["tags"] = d.Tags
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kafka *KafkaPushEndpoint) send2KafkaTask() {
|
||||||
|
kf, err := NewKfClient(kafka.Section)
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("init kafka client fail: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer kf.Close()
|
||||||
|
for {
|
||||||
|
kafkaItem := <-kafka.KafkaQueue
|
||||||
|
stats.Counter.Set("points.out.kafka", 1)
|
||||||
|
err = kf.Send(kafkaItem)
|
||||||
|
if err != nil {
|
||||||
|
stats.Counter.Set("points.out.kafka.err", 1)
|
||||||
|
logger.Errorf("send %v to kafka %s fail: %v", kafkaItem, kafka.Section.BrokersPeers, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type KafkaData map[string]interface{}
|
type KafkaData map[string]interface{}
|
||||||
type KfClient struct {
|
type KfClient struct {
|
||||||
producer sarama.AsyncProducer
|
producer sarama.AsyncProducer
|
||||||
|
@ -45,11 +113,11 @@ func NewKfClient(c KafkaSection) (kafkaSender *KfClient, err error) {
|
||||||
cfg.Net.SASL.User = c.SaslUser
|
cfg.Net.SASL.User = c.SaslUser
|
||||||
cfg.Net.SASL.Password = c.SaslPasswd
|
cfg.Net.SASL.Password = c.SaslPasswd
|
||||||
}
|
}
|
||||||
if c.Retry > 0 {
|
if c.MaxRetry > 0 {
|
||||||
cfg.Producer.Retry.Max = c.Retry
|
cfg.Producer.Retry.Max = c.MaxRetry
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg.Net.DialTimeout = time.Duration(connTimeout) * time.Millisecond
|
cfg.Net.DialTimeout = time.Duration(c.ConnTimeout) * time.Millisecond
|
||||||
|
|
||||||
if c.KeepAlive > 0 {
|
if c.KeepAlive > 0 {
|
||||||
cfg.Net.KeepAlive = time.Duration(c.KeepAlive) * time.Millisecond
|
cfg.Net.KeepAlive = time.Duration(c.KeepAlive) * time.Millisecond
|
||||||
|
@ -58,10 +126,11 @@ func NewKfClient(c KafkaSection) (kafkaSender *KfClient, err error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
kafkaSender = newSender(brokers, topic, cfg, producer)
|
kafkaSender = newSender(brokers, topic, cfg, producer, c.CallTimeout)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
func newSender(brokers []string, topic string, cfg *sarama.Config, producer sarama.AsyncProducer) (kf *KfClient) {
|
func newSender(brokers []string, topic string, cfg *sarama.Config, producer sarama.AsyncProducer,
|
||||||
|
callTimeout int) (kf *KfClient) {
|
||||||
kf = &KfClient{
|
kf = &KfClient{
|
||||||
producer: producer,
|
producer: producer,
|
||||||
Topic: topic,
|
Topic: topic,
|
||||||
|
|
|
@ -0,0 +1,136 @@
|
||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/didi/nightingale/src/dataobj"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/pools"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/stats"
|
||||||
|
"github.com/toolkits/pkg/concurrent/semaphore"
|
||||||
|
"github.com/toolkits/pkg/container/list"
|
||||||
|
"github.com/toolkits/pkg/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
type OpenTsdbSection struct {
|
||||||
|
Enabled bool `yaml:"enabled"`
|
||||||
|
Name string `yaml:"name"`
|
||||||
|
Batch int `yaml:"batch"`
|
||||||
|
ConnTimeout int `yaml:"connTimeout"`
|
||||||
|
CallTimeout int `yaml:"callTimeout"`
|
||||||
|
WorkerNum int `yaml:"workerNum"`
|
||||||
|
MaxConns int `yaml:"maxConns"`
|
||||||
|
MaxIdle int `yaml:"maxIdle"`
|
||||||
|
MaxRetry int `yaml:"maxRetry"`
|
||||||
|
Address string `yaml:"address"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type OpenTsdbPushEndpoint struct {
|
||||||
|
// config
|
||||||
|
Section OpenTsdbSection
|
||||||
|
|
||||||
|
OpenTsdbConnPoolHelper *pools.OpenTsdbConnPoolHelper
|
||||||
|
|
||||||
|
// 发送缓存队列 node -> queue_of_data
|
||||||
|
OpenTsdbQueue *list.SafeListLimited
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opentsdb *OpenTsdbPushEndpoint) Init() {
|
||||||
|
// init connPool
|
||||||
|
if opentsdb.Section.Enabled {
|
||||||
|
opentsdb.OpenTsdbConnPoolHelper = pools.NewOpenTsdbConnPoolHelper(opentsdb.Section.Address,
|
||||||
|
opentsdb.Section.MaxConns, opentsdb.Section.MaxIdle, opentsdb.Section.ConnTimeout,
|
||||||
|
opentsdb.Section.CallTimeout)
|
||||||
|
}
|
||||||
|
|
||||||
|
// init queue
|
||||||
|
if opentsdb.Section.Enabled {
|
||||||
|
opentsdb.OpenTsdbQueue = list.NewSafeListLimited(DefaultSendQueueMaxSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
// start task
|
||||||
|
openTsdbConcurrent := opentsdb.Section.WorkerNum
|
||||||
|
if openTsdbConcurrent < 1 {
|
||||||
|
openTsdbConcurrent = 1
|
||||||
|
}
|
||||||
|
go opentsdb.send2OpenTsdbTask(openTsdbConcurrent)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将原始数据入到tsdb发送缓存队列
|
||||||
|
func (opentsdb *OpenTsdbPushEndpoint) Push2Queue(items []*dataobj.MetricValue) {
|
||||||
|
errCnt := 0
|
||||||
|
for _, item := range items {
|
||||||
|
tsdbItem := opentsdb.convert2OpenTsdbItem(item)
|
||||||
|
isSuccess := opentsdb.OpenTsdbQueue.PushFront(tsdbItem)
|
||||||
|
|
||||||
|
if !isSuccess {
|
||||||
|
errCnt += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stats.Counter.Set("opentsdb.queue.err", errCnt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opentsdb *OpenTsdbPushEndpoint) send2OpenTsdbTask(concurrent int) {
|
||||||
|
batch := opentsdb.Section.Batch // 一次发送,最多batch条数据
|
||||||
|
retry := opentsdb.Section.MaxRetry
|
||||||
|
addr := opentsdb.Section.Address
|
||||||
|
sema := semaphore.NewSemaphore(concurrent)
|
||||||
|
|
||||||
|
for {
|
||||||
|
items := opentsdb.OpenTsdbQueue.PopBackBy(batch)
|
||||||
|
count := len(items)
|
||||||
|
if count == 0 {
|
||||||
|
time.Sleep(DefaultSendTaskSleepInterval)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var openTsdbBuffer bytes.Buffer
|
||||||
|
|
||||||
|
for i := 0; i < count; i++ {
|
||||||
|
tsdbItem := items[i].(*dataobj.OpenTsdbItem)
|
||||||
|
openTsdbBuffer.WriteString(tsdbItem.OpenTsdbString())
|
||||||
|
openTsdbBuffer.WriteString("\n")
|
||||||
|
stats.Counter.Set("points.out.opentsdb", 1)
|
||||||
|
logger.Debug("send to opentsdb: ", tsdbItem)
|
||||||
|
}
|
||||||
|
// 同步Call + 有限并发 进行发送
|
||||||
|
sema.Acquire()
|
||||||
|
go func(addr string, openTsdbBuffer bytes.Buffer, count int) {
|
||||||
|
defer sema.Release()
|
||||||
|
|
||||||
|
var err error
|
||||||
|
sendOk := false
|
||||||
|
for i := 0; i < retry; i++ {
|
||||||
|
err = opentsdb.OpenTsdbConnPoolHelper.Send(openTsdbBuffer.Bytes())
|
||||||
|
if err == nil {
|
||||||
|
sendOk = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
logger.Warningf("send opentsdb %s fail: %v", addr, err)
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !sendOk {
|
||||||
|
stats.Counter.Set("points.out.opentsdb.err", count)
|
||||||
|
for _, item := range items {
|
||||||
|
logger.Errorf("send %v to opentsdb %s fail: %v", item, addr, err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
logger.Debugf("send to opentsdb %s ok", addr)
|
||||||
|
}
|
||||||
|
}(addr, openTsdbBuffer, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opentsdb *OpenTsdbPushEndpoint) convert2OpenTsdbItem(d *dataobj.MetricValue) *dataobj.OpenTsdbItem {
|
||||||
|
t := dataobj.OpenTsdbItem{Tags: make(map[string]string)}
|
||||||
|
|
||||||
|
for k, v := range d.TagsMap {
|
||||||
|
t.Tags[k] = v
|
||||||
|
}
|
||||||
|
t.Tags["endpoint"] = d.Endpoint
|
||||||
|
t.Metric = d.Metric
|
||||||
|
t.Timestamp = d.Timestamp
|
||||||
|
t.Value = d.Value
|
||||||
|
return &t
|
||||||
|
}
|
|
@ -1,535 +0,0 @@
|
||||||
package backend
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/didi/nightingale/src/dataobj"
|
|
||||||
"github.com/didi/nightingale/src/model"
|
|
||||||
"github.com/didi/nightingale/src/modules/transfer/cache"
|
|
||||||
"github.com/didi/nightingale/src/toolkits/stats"
|
|
||||||
"github.com/didi/nightingale/src/toolkits/str"
|
|
||||||
|
|
||||||
client "github.com/influxdata/influxdb/client/v2"
|
|
||||||
"github.com/toolkits/pkg/concurrent/semaphore"
|
|
||||||
"github.com/toolkits/pkg/container/list"
|
|
||||||
"github.com/toolkits/pkg/logger"
|
|
||||||
)
|
|
||||||
|
|
||||||
// send
|
|
||||||
const (
|
|
||||||
DefaultSendTaskSleepInterval = time.Millisecond * 50 //默认睡眠间隔为50ms
|
|
||||||
MaxSendRetry = 10
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
MinStep int //最小上报周期,单位sec
|
|
||||||
)
|
|
||||||
|
|
||||||
func startSendTasks() {
|
|
||||||
|
|
||||||
tsdbConcurrent := Config.WorkerNum
|
|
||||||
if tsdbConcurrent < 1 {
|
|
||||||
tsdbConcurrent = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
judgeConcurrent := Config.WorkerNum
|
|
||||||
if judgeConcurrent < 1 {
|
|
||||||
judgeConcurrent = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
influxdbConcurrent := Config.Influxdb.WorkerNum
|
|
||||||
if influxdbConcurrent < 1 {
|
|
||||||
influxdbConcurrent = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
openTsdbConcurrent := Config.OpenTsdb.WorkerNum
|
|
||||||
if openTsdbConcurrent < 1 {
|
|
||||||
openTsdbConcurrent = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
if Config.Enabled {
|
|
||||||
for node, item := range Config.ClusterList {
|
|
||||||
for _, addr := range item.Addrs {
|
|
||||||
queue := TsdbQueues[node+addr]
|
|
||||||
go Send2TsdbTask(queue, node, addr, tsdbConcurrent)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if Config.Enabled {
|
|
||||||
judgeQueue := JudgeQueues.GetAll()
|
|
||||||
for instance, queue := range judgeQueue {
|
|
||||||
go Send2JudgeTask(queue, instance, judgeConcurrent)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if Config.Influxdb.Enabled {
|
|
||||||
go send2InfluxdbTask(influxdbConcurrent)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if Config.OpenTsdb.Enabled {
|
|
||||||
go send2OpenTsdbTask(openTsdbConcurrent)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if Config.Kafka.Enabled {
|
|
||||||
go send2KafkaTask()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func Send2TsdbTask(Q *list.SafeListLimited, node, addr string, concurrent int) {
|
|
||||||
batch := Config.Batch // 一次发送,最多batch条数据
|
|
||||||
Q = TsdbQueues[node+addr]
|
|
||||||
|
|
||||||
sema := semaphore.NewSemaphore(concurrent)
|
|
||||||
|
|
||||||
for {
|
|
||||||
items := Q.PopBackBy(batch)
|
|
||||||
count := len(items)
|
|
||||||
if count == 0 {
|
|
||||||
time.Sleep(DefaultSendTaskSleepInterval)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
tsdbItems := make([]*dataobj.TsdbItem, count)
|
|
||||||
stats.Counter.Set("points.out.tsdb", count)
|
|
||||||
for i := 0; i < count; i++ {
|
|
||||||
tsdbItems[i] = items[i].(*dataobj.TsdbItem)
|
|
||||||
logger.Debug("send to tsdb->: ", tsdbItems[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
//控制并发
|
|
||||||
sema.Acquire()
|
|
||||||
go func(addr string, tsdbItems []*dataobj.TsdbItem, count int) {
|
|
||||||
defer sema.Release()
|
|
||||||
|
|
||||||
resp := &dataobj.SimpleRpcResponse{}
|
|
||||||
var err error
|
|
||||||
sendOk := false
|
|
||||||
for i := 0; i < 3; i++ { //最多重试3次
|
|
||||||
err = TsdbConnPools.Call(addr, "Tsdb.Send", tsdbItems, resp)
|
|
||||||
if err == nil {
|
|
||||||
sendOk = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
time.Sleep(time.Millisecond * 10)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !sendOk {
|
|
||||||
stats.Counter.Set("points.out.tsdb.err", count)
|
|
||||||
logger.Errorf("send %v to tsdb %s:%s fail: %v", tsdbItems, node, addr, err)
|
|
||||||
} else {
|
|
||||||
logger.Debugf("send to tsdb %s:%s ok", node, addr)
|
|
||||||
}
|
|
||||||
}(addr, tsdbItems, count)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Push2TsdbSendQueue pushes data to a TSDB instance which depends on the consistent ring.
|
|
||||||
func Push2TsdbSendQueue(items []*dataobj.MetricValue) {
|
|
||||||
errCnt := 0
|
|
||||||
for _, item := range items {
|
|
||||||
tsdbItem := convert2TsdbItem(item)
|
|
||||||
stats.Counter.Set("tsdb.queue.push", 1)
|
|
||||||
|
|
||||||
node, err := TsdbNodeRing.GetNode(item.PK())
|
|
||||||
if err != nil {
|
|
||||||
logger.Warningf("get tsdb node error: %v", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
cnode := Config.ClusterList[node]
|
|
||||||
for _, addr := range cnode.Addrs {
|
|
||||||
Q := TsdbQueues[node+addr]
|
|
||||||
// 队列已满
|
|
||||||
if !Q.PushFront(tsdbItem) {
|
|
||||||
errCnt += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// statistics
|
|
||||||
if errCnt > 0 {
|
|
||||||
stats.Counter.Set("tsdb.queue.err", errCnt)
|
|
||||||
logger.Error("Push2TsdbSendQueue err num: ", errCnt)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func Send2JudgeTask(Q *list.SafeListLimited, addr string, concurrent int) {
|
|
||||||
batch := Config.Batch
|
|
||||||
sema := semaphore.NewSemaphore(concurrent)
|
|
||||||
|
|
||||||
for {
|
|
||||||
items := Q.PopBackBy(batch)
|
|
||||||
count := len(items)
|
|
||||||
if count == 0 {
|
|
||||||
time.Sleep(DefaultSendTaskSleepInterval)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
judgeItems := make([]*dataobj.JudgeItem, count)
|
|
||||||
stats.Counter.Set("points.out.judge", count)
|
|
||||||
for i := 0; i < count; i++ {
|
|
||||||
judgeItems[i] = items[i].(*dataobj.JudgeItem)
|
|
||||||
logger.Debug("send to judge: ", judgeItems[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
sema.Acquire()
|
|
||||||
go func(addr string, judgeItems []*dataobj.JudgeItem, count int) {
|
|
||||||
defer sema.Release()
|
|
||||||
|
|
||||||
resp := &dataobj.SimpleRpcResponse{}
|
|
||||||
var err error
|
|
||||||
sendOk := false
|
|
||||||
for i := 0; i < MaxSendRetry; i++ {
|
|
||||||
err = JudgeConnPools.Call(addr, "Judge.Send", judgeItems, resp)
|
|
||||||
if err == nil {
|
|
||||||
sendOk = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
logger.Warningf("send judge %s fail: %v", addr, err)
|
|
||||||
time.Sleep(time.Millisecond * 10)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !sendOk {
|
|
||||||
stats.Counter.Set("points.out.judge.err", count)
|
|
||||||
for _, item := range judgeItems {
|
|
||||||
logger.Errorf("send %v to judge %s fail: %v", item, addr, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}(addr, judgeItems, count)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func Push2JudgeSendQueue(items []*dataobj.MetricValue) {
|
|
||||||
errCnt := 0
|
|
||||||
for _, item := range items {
|
|
||||||
key := str.PK(item.Metric, item.Endpoint)
|
|
||||||
stras := cache.StraMap.GetByKey(key)
|
|
||||||
|
|
||||||
for _, stra := range stras {
|
|
||||||
if !TagMatch(stra.Tags, item.TagsMap) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
judgeItem := &dataobj.JudgeItem{
|
|
||||||
Endpoint: item.Endpoint,
|
|
||||||
Metric: item.Metric,
|
|
||||||
Value: item.Value,
|
|
||||||
Timestamp: item.Timestamp,
|
|
||||||
DsType: item.CounterType,
|
|
||||||
Tags: item.Tags,
|
|
||||||
TagsMap: item.TagsMap,
|
|
||||||
Step: int(item.Step),
|
|
||||||
Sid: stra.Id,
|
|
||||||
Extra: item.Extra,
|
|
||||||
}
|
|
||||||
|
|
||||||
q, exists := JudgeQueues.Get(stra.JudgeInstance)
|
|
||||||
if exists {
|
|
||||||
if !q.PushFront(judgeItem) {
|
|
||||||
errCnt += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stats.Counter.Set("judge.queue.err", errCnt)
|
|
||||||
}
|
|
||||||
|
|
||||||
// 打到 Tsdb 的数据,要根据 rrdtool 的特定 来限制 step、counterType、timestamp
|
|
||||||
func convert2TsdbItem(d *dataobj.MetricValue) *dataobj.TsdbItem {
|
|
||||||
item := &dataobj.TsdbItem{
|
|
||||||
Endpoint: d.Endpoint,
|
|
||||||
Metric: d.Metric,
|
|
||||||
Value: d.Value,
|
|
||||||
Timestamp: d.Timestamp,
|
|
||||||
Tags: d.Tags,
|
|
||||||
TagsMap: d.TagsMap,
|
|
||||||
Step: int(d.Step),
|
|
||||||
Heartbeat: int(d.Step) * 2,
|
|
||||||
DsType: dataobj.GAUGE,
|
|
||||||
Min: "U",
|
|
||||||
Max: "U",
|
|
||||||
}
|
|
||||||
|
|
||||||
return item
|
|
||||||
}
|
|
||||||
|
|
||||||
func alignTs(ts int64, period int64) int64 {
|
|
||||||
return ts - ts%period
|
|
||||||
}
|
|
||||||
|
|
||||||
func TagMatch(straTags []model.Tag, tag map[string]string) bool {
|
|
||||||
for _, stag := range straTags {
|
|
||||||
if _, exists := tag[stag.Tkey]; !exists {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
var match bool
|
|
||||||
if stag.Topt == "=" { //当前策略 tagkey 对应的 tagv
|
|
||||||
for _, v := range stag.Tval {
|
|
||||||
if tag[stag.Tkey] == v {
|
|
||||||
match = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
match = true
|
|
||||||
for _, v := range stag.Tval {
|
|
||||||
if tag[stag.Tkey] == v {
|
|
||||||
match = false
|
|
||||||
return match
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !match {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
type InfluxClient struct {
|
|
||||||
Client client.Client
|
|
||||||
Database string
|
|
||||||
Precision string
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewInfluxdbClient() (*InfluxClient, error) {
|
|
||||||
c, err := client.NewHTTPClient(client.HTTPConfig{
|
|
||||||
Addr: Config.Influxdb.Address,
|
|
||||||
Username: Config.Influxdb.Username,
|
|
||||||
Password: Config.Influxdb.Password,
|
|
||||||
Timeout: time.Millisecond * time.Duration(Config.Influxdb.Timeout),
|
|
||||||
})
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &InfluxClient{
|
|
||||||
Client: c,
|
|
||||||
Database: Config.Influxdb.Database,
|
|
||||||
Precision: Config.Influxdb.Precision,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *InfluxClient) Send(items []*dataobj.InfluxdbItem) error {
|
|
||||||
bp, err := client.NewBatchPoints(client.BatchPointsConfig{
|
|
||||||
Database: c.Database,
|
|
||||||
Precision: c.Precision,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("create batch points error: ", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, item := range items {
|
|
||||||
pt, err := client.NewPoint(item.Measurement, item.Tags, item.Fields, time.Unix(item.Timestamp, 0))
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("create new points error: ", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
bp.AddPoint(pt)
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.Client.Write(bp)
|
|
||||||
}
|
|
||||||
|
|
||||||
// 将原始数据插入到influxdb缓存队列
|
|
||||||
func Push2InfluxdbSendQueue(items []*dataobj.MetricValue) {
|
|
||||||
errCnt := 0
|
|
||||||
for _, item := range items {
|
|
||||||
influxdbItem := convert2InfluxdbItem(item)
|
|
||||||
isSuccess := InfluxdbQueue.PushFront(influxdbItem)
|
|
||||||
|
|
||||||
if !isSuccess {
|
|
||||||
errCnt += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stats.Counter.Set("influxdb.queue.err", errCnt)
|
|
||||||
}
|
|
||||||
|
|
||||||
func convert2InfluxdbItem(d *dataobj.MetricValue) *dataobj.InfluxdbItem {
|
|
||||||
t := dataobj.InfluxdbItem{Tags: make(map[string]string), Fields: make(map[string]interface{})}
|
|
||||||
|
|
||||||
for k, v := range d.TagsMap {
|
|
||||||
t.Tags[k] = v
|
|
||||||
}
|
|
||||||
t.Tags["endpoint"] = d.Endpoint
|
|
||||||
t.Measurement = d.Metric
|
|
||||||
t.Fields["value"] = d.Value
|
|
||||||
t.Timestamp = d.Timestamp
|
|
||||||
|
|
||||||
return &t
|
|
||||||
}
|
|
||||||
|
|
||||||
func send2InfluxdbTask(concurrent int) {
|
|
||||||
batch := Config.Influxdb.Batch // 一次发送,最多batch条数据
|
|
||||||
retry := Config.Influxdb.MaxRetry
|
|
||||||
addr := Config.Influxdb.Address
|
|
||||||
sema := semaphore.NewSemaphore(concurrent)
|
|
||||||
|
|
||||||
var err error
|
|
||||||
c, err := NewInfluxdbClient()
|
|
||||||
defer c.Client.Close()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
logger.Errorf("init influxdb client fail: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
for {
|
|
||||||
items := InfluxdbQueue.PopBackBy(batch)
|
|
||||||
count := len(items)
|
|
||||||
if count == 0 {
|
|
||||||
time.Sleep(DefaultSendTaskSleepInterval)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
influxdbItems := make([]*dataobj.InfluxdbItem, count)
|
|
||||||
for i := 0; i < count; i++ {
|
|
||||||
influxdbItems[i] = items[i].(*dataobj.InfluxdbItem)
|
|
||||||
stats.Counter.Set("points.out.influxdb", 1)
|
|
||||||
logger.Debug("send to influxdb: ", influxdbItems[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
// 同步Call + 有限并发 进行发送
|
|
||||||
sema.Acquire()
|
|
||||||
go func(addr string, influxdbItems []*dataobj.InfluxdbItem, count int) {
|
|
||||||
defer sema.Release()
|
|
||||||
sendOk := false
|
|
||||||
|
|
||||||
for i := 0; i < retry; i++ {
|
|
||||||
err = c.Send(influxdbItems)
|
|
||||||
if err == nil {
|
|
||||||
sendOk = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
logger.Warningf("send influxdb fail: %v", err)
|
|
||||||
time.Sleep(time.Millisecond * 10)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !sendOk {
|
|
||||||
stats.Counter.Set("points.out.influxdb.err", count)
|
|
||||||
logger.Errorf("send %v to influxdb %s fail: %v", influxdbItems, addr, err)
|
|
||||||
} else {
|
|
||||||
logger.Debugf("send to influxdb %s ok", addr)
|
|
||||||
}
|
|
||||||
}(addr, influxdbItems, count)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 将原始数据入到tsdb发送缓存队列
|
|
||||||
func Push2OpenTsdbSendQueue(items []*dataobj.MetricValue) {
|
|
||||||
errCnt := 0
|
|
||||||
for _, item := range items {
|
|
||||||
tsdbItem := convert2OpenTsdbItem(item)
|
|
||||||
isSuccess := OpenTsdbQueue.PushFront(tsdbItem)
|
|
||||||
|
|
||||||
if !isSuccess {
|
|
||||||
errCnt += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stats.Counter.Set("opentsdb.queue.err", errCnt)
|
|
||||||
}
|
|
||||||
|
|
||||||
func send2OpenTsdbTask(concurrent int) {
|
|
||||||
batch := Config.OpenTsdb.Batch // 一次发送,最多batch条数据
|
|
||||||
retry := Config.OpenTsdb.MaxRetry
|
|
||||||
addr := Config.OpenTsdb.Address
|
|
||||||
sema := semaphore.NewSemaphore(concurrent)
|
|
||||||
|
|
||||||
for {
|
|
||||||
items := OpenTsdbQueue.PopBackBy(batch)
|
|
||||||
count := len(items)
|
|
||||||
if count == 0 {
|
|
||||||
time.Sleep(DefaultSendTaskSleepInterval)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
var openTsdbBuffer bytes.Buffer
|
|
||||||
|
|
||||||
for i := 0; i < count; i++ {
|
|
||||||
tsdbItem := items[i].(*dataobj.OpenTsdbItem)
|
|
||||||
openTsdbBuffer.WriteString(tsdbItem.OpenTsdbString())
|
|
||||||
openTsdbBuffer.WriteString("\n")
|
|
||||||
stats.Counter.Set("points.out.opentsdb", 1)
|
|
||||||
logger.Debug("send to opentsdb: ", tsdbItem)
|
|
||||||
}
|
|
||||||
// 同步Call + 有限并发 进行发送
|
|
||||||
sema.Acquire()
|
|
||||||
go func(addr string, openTsdbBuffer bytes.Buffer, count int) {
|
|
||||||
defer sema.Release()
|
|
||||||
|
|
||||||
var err error
|
|
||||||
sendOk := false
|
|
||||||
for i := 0; i < retry; i++ {
|
|
||||||
err = OpenTsdbConnPoolHelper.Send(openTsdbBuffer.Bytes())
|
|
||||||
if err == nil {
|
|
||||||
sendOk = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
logger.Warningf("send opentsdb %s fail: %v", addr, err)
|
|
||||||
time.Sleep(100 * time.Millisecond)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !sendOk {
|
|
||||||
stats.Counter.Set("points.out.opentsdb.err", count)
|
|
||||||
for _, item := range items {
|
|
||||||
logger.Errorf("send %v to opentsdb %s fail: %v", item, addr, err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
logger.Debugf("send to opentsdb %s ok", addr)
|
|
||||||
}
|
|
||||||
}(addr, openTsdbBuffer, count)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func convert2OpenTsdbItem(d *dataobj.MetricValue) *dataobj.OpenTsdbItem {
|
|
||||||
t := dataobj.OpenTsdbItem{Tags: make(map[string]string)}
|
|
||||||
|
|
||||||
for k, v := range d.TagsMap {
|
|
||||||
t.Tags[k] = v
|
|
||||||
}
|
|
||||||
t.Tags["endpoint"] = d.Endpoint
|
|
||||||
t.Metric = d.Metric
|
|
||||||
t.Timestamp = d.Timestamp
|
|
||||||
t.Value = d.Value
|
|
||||||
return &t
|
|
||||||
}
|
|
||||||
|
|
||||||
func Push2KafkaSendQueue(items []*dataobj.MetricValue) {
|
|
||||||
for _, item := range items {
|
|
||||||
KafkaQueue <- convert2KafkaItem(item)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func convert2KafkaItem(d *dataobj.MetricValue) KafkaData {
|
|
||||||
m := make(KafkaData)
|
|
||||||
m["metric"] = d.Metric
|
|
||||||
m["value"] = d.Value
|
|
||||||
m["timestamp"] = d.Timestamp
|
|
||||||
m["value"] = d.Value
|
|
||||||
m["step"] = d.Step
|
|
||||||
m["endpoint"] = d.Endpoint
|
|
||||||
m["tags"] = d.Tags
|
|
||||||
return m
|
|
||||||
}
|
|
||||||
|
|
||||||
func send2KafkaTask() {
|
|
||||||
kf, err := NewKfClient(Config.Kafka)
|
|
||||||
if err != nil {
|
|
||||||
logger.Errorf("init kafka client fail: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer kf.Close()
|
|
||||||
for {
|
|
||||||
kafkaItem := <-KafkaQueue
|
|
||||||
stats.Counter.Set("points.out.kafka", 1)
|
|
||||||
err = kf.Send(kafkaItem)
|
|
||||||
if err != nil {
|
|
||||||
stats.Counter.Set("points.out.kafka.err", 1)
|
|
||||||
logger.Errorf("send %v to kafka %s fail: %v", kafkaItem, Config.Kafka.BrokersPeers, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
package tsdb
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/didi/nightingale/src/toolkits/report"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/stats"
|
||||||
|
|
||||||
|
"github.com/toolkits/pkg/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
var IndexList IndexAddrs
|
||||||
|
|
||||||
|
type IndexAddrs struct {
|
||||||
|
sync.RWMutex
|
||||||
|
Data []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *IndexAddrs) Set(addrs []string) {
|
||||||
|
i.Lock()
|
||||||
|
defer i.Unlock()
|
||||||
|
i.Data = addrs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *IndexAddrs) Get() []string {
|
||||||
|
i.RLock()
|
||||||
|
defer i.RUnlock()
|
||||||
|
return i.Data
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetIndexLoop() {
|
||||||
|
t1 := time.NewTicker(time.Duration(9) * time.Second)
|
||||||
|
GetIndex()
|
||||||
|
for {
|
||||||
|
<-t1.C
|
||||||
|
GetIndex()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetIndex() {
|
||||||
|
instances, err := report.GetAlive("index", "monapi")
|
||||||
|
if err != nil {
|
||||||
|
stats.Counter.Set("get.index.err", 1)
|
||||||
|
logger.Warningf("get index list err:%v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
activeIndexs := []string{}
|
||||||
|
for _, instance := range instances {
|
||||||
|
activeIndexs = append(activeIndexs, fmt.Sprintf("%s:%s", instance.Identity, instance.HTTPPort))
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexList.Set(activeIndexs)
|
||||||
|
return
|
||||||
|
}
|
|
@ -1,26 +1,24 @@
|
||||||
package backend
|
package tsdb
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/toolkits/pkg/net/httplib"
|
||||||
|
|
||||||
"github.com/didi/nightingale/src/dataobj"
|
"github.com/didi/nightingale/src/dataobj"
|
||||||
"github.com/didi/nightingale/src/modules/transfer/calc"
|
"github.com/didi/nightingale/src/modules/transfer/calc"
|
||||||
"github.com/didi/nightingale/src/toolkits/address"
|
|
||||||
"github.com/didi/nightingale/src/toolkits/pools"
|
"github.com/didi/nightingale/src/toolkits/pools"
|
||||||
"github.com/didi/nightingale/src/toolkits/stats"
|
"github.com/didi/nightingale/src/toolkits/stats"
|
||||||
|
|
||||||
"github.com/toolkits/pkg/logger"
|
"github.com/toolkits/pkg/logger"
|
||||||
"github.com/toolkits/pkg/net/httplib"
|
|
||||||
"github.com/toolkits/pkg/pool"
|
"github.com/toolkits/pkg/pool"
|
||||||
)
|
)
|
||||||
|
|
||||||
func FetchData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse {
|
func (tsdb *TsdbDataSource) QueryData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse {
|
||||||
|
logger.Debugf("query data, inputs: %+v", inputs)
|
||||||
|
|
||||||
workerNum := 100
|
workerNum := 100
|
||||||
worker := make(chan struct{}, workerNum) // 控制 goroutine 并发数
|
worker := make(chan struct{}, workerNum) // 控制 goroutine 并发数
|
||||||
dataChan := make(chan *dataobj.TsdbQueryResponse, 20000)
|
dataChan := make(chan *dataobj.TsdbQueryResponse, 20000)
|
||||||
|
@ -38,7 +36,8 @@ func FetchData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse {
|
||||||
for _, endpoint := range input.Endpoints {
|
for _, endpoint := range input.Endpoints {
|
||||||
for _, counter := range input.Counters {
|
for _, counter := range input.Counters {
|
||||||
worker <- struct{}{}
|
worker <- struct{}{}
|
||||||
go fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker, dataChan)
|
go tsdb.fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker,
|
||||||
|
dataChan)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -55,7 +54,10 @@ func FetchData(inputs []dataobj.QueryData) []*dataobj.TsdbQueryResponse {
|
||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
func FetchDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse {
|
func (tsdb *TsdbDataSource) QueryDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse {
|
||||||
|
|
||||||
|
logger.Debugf("query data for ui, input: %+v", input)
|
||||||
|
|
||||||
workerNum := 100
|
workerNum := 100
|
||||||
worker := make(chan struct{}, workerNum) // 控制 goroutine 并发数
|
worker := make(chan struct{}, workerNum) // 控制 goroutine 并发数
|
||||||
dataChan := make(chan *dataobj.TsdbQueryResponse, 20000)
|
dataChan := make(chan *dataobj.TsdbQueryResponse, 20000)
|
||||||
|
@ -71,22 +73,24 @@ func FetchDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse {
|
||||||
|
|
||||||
for _, endpoint := range input.Endpoints {
|
for _, endpoint := range input.Endpoints {
|
||||||
if len(input.Tags) == 0 {
|
if len(input.Tags) == 0 {
|
||||||
counter, err := GetCounter(input.Metric, "", nil)
|
counter, err := dataobj.GetCounter(input.Metric, "", nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warningf("get counter error: %+v", err)
|
logger.Warningf("get counter error: %+v", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
worker <- struct{}{}
|
worker <- struct{}{}
|
||||||
go fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker, dataChan)
|
go tsdb.fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker,
|
||||||
|
dataChan)
|
||||||
} else {
|
} else {
|
||||||
for _, tag := range input.Tags {
|
for _, tag := range input.Tags {
|
||||||
counter, err := GetCounter(input.Metric, tag, nil)
|
counter, err := dataobj.GetCounter(input.Metric, tag, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warningf("get counter error: %+v", err)
|
logger.Warningf("get counter error: %+v", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
worker <- struct{}{}
|
worker <- struct{}{}
|
||||||
go fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker, dataChan)
|
go tsdb.fetchDataSync(input.Start, input.End, input.ConsolFunc, endpoint, counter, input.Step, worker,
|
||||||
|
dataChan)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -154,30 +158,16 @@ func FetchDataForUI(input dataobj.QueryDataForUI) []*dataobj.TsdbQueryResponse {
|
||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetCounter(metric, tag string, tagMap map[string]string) (counter string, err error) {
|
func (tsdb *TsdbDataSource) fetchDataSync(start, end int64, consolFun, endpoint, counter string, step int, worker chan struct{}, dataChan chan *dataobj.TsdbQueryResponse) {
|
||||||
if tagMap == nil {
|
|
||||||
tagMap, err = dataobj.SplitTagsString(tag)
|
|
||||||
if err != nil {
|
|
||||||
logger.Warningf("split tag string error: %+v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tagStr := dataobj.SortedTags(tagMap)
|
|
||||||
counter = dataobj.PKWithTags(metric, tagStr)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func fetchDataSync(start, end int64, consolFun, endpoint, counter string, step int, worker chan struct{}, dataChan chan *dataobj.TsdbQueryResponse) {
|
|
||||||
defer func() {
|
defer func() {
|
||||||
<-worker
|
<-worker
|
||||||
}()
|
}()
|
||||||
stats.Counter.Set("query.tsdb", 1)
|
stats.Counter.Set("query.tsdb", 1)
|
||||||
|
|
||||||
data, err := fetchData(start, end, consolFun, endpoint, counter, step)
|
data, err := tsdb.fetchData(start, end, consolFun, endpoint, counter, step)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warningf("fetch tsdb data error: %+v", err)
|
logger.Warningf("fetch tsdb data error: %+v", err)
|
||||||
stats.Counter.Set("query.data.err", 1)
|
stats.Counter.Set("query.tsdb.err", 1)
|
||||||
data.Endpoint = endpoint
|
data.Endpoint = endpoint
|
||||||
data.Counter = counter
|
data.Counter = counter
|
||||||
data.Step = step
|
data.Step = step
|
||||||
|
@ -185,11 +175,11 @@ func fetchDataSync(start, end int64, consolFun, endpoint, counter string, step i
|
||||||
dataChan <- data
|
dataChan <- data
|
||||||
}
|
}
|
||||||
|
|
||||||
func fetchData(start, end int64, consolFun, endpoint, counter string, step int) (*dataobj.TsdbQueryResponse, error) {
|
func (tsdb *TsdbDataSource) fetchData(start, end int64, consolFun, endpoint, counter string, step int) (*dataobj.TsdbQueryResponse, error) {
|
||||||
var resp *dataobj.TsdbQueryResponse
|
var resp *dataobj.TsdbQueryResponse
|
||||||
|
|
||||||
qparm := GenQParam(start, end, consolFun, endpoint, counter, step)
|
qparm := genQParam(start, end, consolFun, endpoint, counter, step)
|
||||||
resp, err := QueryOne(qparm)
|
resp, err := tsdb.QueryOne(qparm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return resp, err
|
return resp, err
|
||||||
}
|
}
|
||||||
|
@ -200,12 +190,7 @@ func fetchData(start, end int64, consolFun, endpoint, counter string, step int)
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getCounterStep(endpoint, counter string) (step int, err error) {
|
func genQParam(start, end int64, consolFunc, endpoint, counter string, step int) dataobj.TsdbQueryParam {
|
||||||
//从内存中获取
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func GenQParam(start, end int64, consolFunc, endpoint, counter string, step int) dataobj.TsdbQueryParam {
|
|
||||||
return dataobj.TsdbQueryParam{
|
return dataobj.TsdbQueryParam{
|
||||||
Start: start,
|
Start: start,
|
||||||
End: end,
|
End: end,
|
||||||
|
@ -216,12 +201,12 @@ func GenQParam(start, end int64, consolFunc, endpoint, counter string, step int)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func QueryOne(para dataobj.TsdbQueryParam) (resp *dataobj.TsdbQueryResponse, err error) {
|
func (tsdb *TsdbDataSource) QueryOne(para dataobj.TsdbQueryParam) (resp *dataobj.TsdbQueryResponse, err error) {
|
||||||
start, end := para.Start, para.End
|
start, end := para.Start, para.End
|
||||||
resp = &dataobj.TsdbQueryResponse{}
|
resp = &dataobj.TsdbQueryResponse{}
|
||||||
|
|
||||||
pk := dataobj.PKWithCounter(para.Endpoint, para.Counter)
|
pk := dataobj.PKWithCounter(para.Endpoint, para.Counter)
|
||||||
ps, err := SelectPoolByPK(pk)
|
ps, err := tsdb.SelectPoolByPK(pk)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return resp, err
|
return resp, err
|
||||||
}
|
}
|
||||||
|
@ -259,7 +244,7 @@ func QueryOne(para dataobj.TsdbQueryParam) (resp *dataobj.TsdbQueryResponse, err
|
||||||
}()
|
}()
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-time.After(time.Duration(callTimeout) * time.Millisecond):
|
case <-time.After(time.Duration(tsdb.Section.CallTimeout) * time.Millisecond):
|
||||||
onePool.ForceClose(conn)
|
onePool.ForceClose(conn)
|
||||||
logger.Errorf("%s, call timeout. proc: %s", addr, onePool.Proc())
|
logger.Errorf("%s, call timeout. proc: %s", addr, onePool.Proc())
|
||||||
break
|
break
|
||||||
|
@ -297,20 +282,20 @@ type Pool struct {
|
||||||
Addr string
|
Addr string
|
||||||
}
|
}
|
||||||
|
|
||||||
func SelectPoolByPK(pk string) ([]Pool, error) {
|
func (tsdb *TsdbDataSource) SelectPoolByPK(pk string) ([]Pool, error) {
|
||||||
node, err := TsdbNodeRing.GetNode(pk)
|
node, err := tsdb.TsdbNodeRing.GetNode(pk)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return []Pool{}, err
|
return []Pool{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
nodeAddrs, found := Config.ClusterList[node]
|
nodeAddrs, found := tsdb.Section.ClusterList[node]
|
||||||
if !found {
|
if !found {
|
||||||
return []Pool{}, errors.New("node not found")
|
return []Pool{}, errors.New("node not found")
|
||||||
}
|
}
|
||||||
|
|
||||||
var ps []Pool
|
var ps []Pool
|
||||||
for _, addr := range nodeAddrs.Addrs {
|
for _, addr := range nodeAddrs.Addrs {
|
||||||
onePool, found := TsdbConnPools.Get(addr)
|
onePool, found := tsdb.TsdbConnPools.Get(addr)
|
||||||
if !found {
|
if !found {
|
||||||
logger.Errorf("addr %s not found", addr)
|
logger.Errorf("addr %s not found", addr)
|
||||||
continue
|
continue
|
||||||
|
@ -325,97 +310,113 @@ func SelectPoolByPK(pk string) ([]Pool, error) {
|
||||||
return ps, nil
|
return ps, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTags(counter string) (tags string) {
|
type IndexMetricsResp struct {
|
||||||
idx := strings.IndexAny(counter, "/")
|
Data *dataobj.MetricResp `json:"dat"`
|
||||||
if idx == -1 {
|
Err string `json:"err"`
|
||||||
return ""
|
|
||||||
}
|
|
||||||
return counter[idx+1:]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Tagkv struct {
|
func (tsdb *TsdbDataSource) QueryMetrics(recv dataobj.EndpointsRecv) *dataobj.MetricResp {
|
||||||
TagK string `json:"tagk"`
|
var result IndexMetricsResp
|
||||||
TagV []string `json:"tagv"`
|
err := PostIndex("/api/index/metrics", int64(tsdb.Section.CallTimeout), recv, &result)
|
||||||
}
|
|
||||||
|
|
||||||
type SeriesReq struct {
|
|
||||||
Endpoints []string `json:"endpoints"`
|
|
||||||
Metric string `json:"metric"`
|
|
||||||
Tagkv []*Tagkv `json:"tagkv"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type SeriesResp struct {
|
|
||||||
Dat []Series `json:"dat"`
|
|
||||||
Err string `json:"err"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Series struct {
|
|
||||||
Endpoints []string `json:"endpoints"`
|
|
||||||
Metric string `json:"metric"`
|
|
||||||
Tags []string `json:"tags"`
|
|
||||||
Step int `json:"step"`
|
|
||||||
DsType string `json:"dstype"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetSeries(start, end int64, req []SeriesReq) ([]dataobj.QueryData, error) {
|
|
||||||
var res SeriesResp
|
|
||||||
var queryDatas []dataobj.QueryData
|
|
||||||
|
|
||||||
if len(req) < 1 {
|
|
||||||
return queryDatas, fmt.Errorf("req length < 1")
|
|
||||||
}
|
|
||||||
|
|
||||||
addrs := address.GetHTTPAddresses("index")
|
|
||||||
|
|
||||||
if len(addrs) < 1 {
|
|
||||||
return queryDatas, fmt.Errorf("index addr is nil")
|
|
||||||
}
|
|
||||||
|
|
||||||
i := rand.Intn(len(addrs))
|
|
||||||
addr := fmt.Sprintf("http://%s/api/index/counter/fullmatch", addrs[i])
|
|
||||||
|
|
||||||
resp, code, err := httplib.PostJSON(addr, time.Duration(Config.IndexTimeout)*time.Millisecond, req, nil)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return queryDatas, err
|
logger.Errorf("post index failed, %+v", err)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if code != 200 {
|
if result.Err != "" {
|
||||||
return nil, fmt.Errorf("index response status code != 200")
|
logger.Errorf("index xclude failed, %+v", result.Err)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = json.Unmarshal(resp, &res); err != nil {
|
return result.Data
|
||||||
logger.Error(string(resp))
|
}
|
||||||
return queryDatas, err
|
|
||||||
}
|
type IndexTagPairsResp struct {
|
||||||
|
Data []dataobj.IndexTagkvResp `json:"dat"`
|
||||||
for _, item := range res.Dat {
|
Err string `json:"err"`
|
||||||
counters := make([]string, 0)
|
}
|
||||||
if len(item.Tags) == 0 {
|
|
||||||
counters = append(counters, item.Metric)
|
func (tsdb *TsdbDataSource) QueryTagPairs(recv dataobj.EndpointMetricRecv) []dataobj.IndexTagkvResp {
|
||||||
} else {
|
var result IndexTagPairsResp
|
||||||
for _, tag := range item.Tags {
|
err := PostIndex("/api/index/tagkv", int64(tsdb.Section.CallTimeout), recv, &result)
|
||||||
tagMap, err := dataobj.SplitTagsString(tag)
|
if err != nil {
|
||||||
if err != nil {
|
logger.Errorf("post index failed, %+v", err)
|
||||||
logger.Warning(err, tag)
|
return nil
|
||||||
continue
|
}
|
||||||
}
|
|
||||||
tagStr := dataobj.SortedTags(tagMap)
|
if result.Err != "" || len(result.Data) == 0 {
|
||||||
counter := dataobj.PKWithTags(item.Metric, tagStr)
|
logger.Errorf("index xclude failed, %+v", result.Err)
|
||||||
counters = append(counters, counter)
|
return nil
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
return result.Data
|
||||||
queryData := dataobj.QueryData{
|
}
|
||||||
Start: start,
|
|
||||||
End: end,
|
type IndexCludeResp struct {
|
||||||
Endpoints: item.Endpoints,
|
Data []dataobj.XcludeResp `json:"dat"`
|
||||||
Counters: counters,
|
Err string `json:"err"`
|
||||||
ConsolFunc: "AVERAGE",
|
}
|
||||||
DsType: item.DsType,
|
|
||||||
Step: item.Step,
|
func (tsdb *TsdbDataSource) QueryIndexByClude(recv []dataobj.CludeRecv) []dataobj.XcludeResp {
|
||||||
}
|
var result IndexCludeResp
|
||||||
queryDatas = append(queryDatas, queryData)
|
err := PostIndex("/api/index/counter/clude", int64(tsdb.Section.CallTimeout), recv, &result)
|
||||||
}
|
if err != nil {
|
||||||
|
logger.Errorf("post index failed, %+v", err)
|
||||||
return queryDatas, err
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Err != "" || len(result.Data) == 0 {
|
||||||
|
logger.Errorf("index xclude failed, %+v", result.Err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.Data
|
||||||
|
}
|
||||||
|
|
||||||
|
type IndexByFullTagsResp struct {
|
||||||
|
Data []dataobj.IndexByFullTagsResp `json:"dat"`
|
||||||
|
Err string `json:"err"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tsdb *TsdbDataSource) QueryIndexByFullTags(recv []dataobj.IndexByFullTagsRecv) []dataobj.IndexByFullTagsResp {
|
||||||
|
var result IndexByFullTagsResp
|
||||||
|
err := PostIndex("/api/index/counter/fullmatch", int64(tsdb.Section.CallTimeout),
|
||||||
|
recv, &result)
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("post index failed, %+v", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Err != "" || len(result.Data) == 0 {
|
||||||
|
logger.Errorf("index fullTags failed, %+v", result.Err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.Data
|
||||||
|
}
|
||||||
|
|
||||||
|
func PostIndex(url string, calltimeout int64, recv interface{}, resp interface{}) error {
|
||||||
|
addrs := IndexList.Get()
|
||||||
|
if len(addrs) == 0 {
|
||||||
|
logger.Errorf("empty index addr")
|
||||||
|
return errors.New("empty index addr")
|
||||||
|
}
|
||||||
|
|
||||||
|
perm := rand.Perm(len(addrs))
|
||||||
|
var err error
|
||||||
|
for i := range perm {
|
||||||
|
url := fmt.Sprintf("http://%s%s", addrs[perm[i]], url)
|
||||||
|
err = httplib.Post(url).JSONBodyQuiet(recv).SetTimeout(
|
||||||
|
time.Duration(calltimeout) * time.Millisecond).ToJSON(&resp)
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
logger.Warningf("index %s failed, error:%v, req:%+v", url, err, recv)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("index %s failed, error:%v, req:%+v", url, err, recv)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
package backend
|
package tsdb
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
|
@ -0,0 +1,212 @@
|
||||||
|
package tsdb
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/toolkits/pkg/errors"
|
||||||
|
|
||||||
|
"github.com/didi/nightingale/src/dataobj"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/pools"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/stats"
|
||||||
|
"github.com/toolkits/pkg/concurrent/semaphore"
|
||||||
|
"github.com/toolkits/pkg/container/list"
|
||||||
|
"github.com/toolkits/pkg/container/set"
|
||||||
|
"github.com/toolkits/pkg/logger"
|
||||||
|
"github.com/toolkits/pkg/str"
|
||||||
|
)
|
||||||
|
|
||||||
|
type TsdbSection struct {
|
||||||
|
Enabled bool `yaml:"enabled"`
|
||||||
|
Name string `yaml:"name"`
|
||||||
|
Batch int `yaml:"batch"`
|
||||||
|
ConnTimeout int `yaml:"connTimeout"`
|
||||||
|
CallTimeout int `yaml:"callTimeout"`
|
||||||
|
WorkerNum int `yaml:"workerNum"`
|
||||||
|
MaxConns int `yaml:"maxConns"`
|
||||||
|
MaxIdle int `yaml:"maxIdle"`
|
||||||
|
IndexTimeout int `yaml:"indexTimeout"`
|
||||||
|
|
||||||
|
Replicas int `yaml:"replicas"`
|
||||||
|
Cluster map[string]string `yaml:"cluster"`
|
||||||
|
ClusterList map[string]*ClusterNode `json:"clusterList"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClusterNode struct {
|
||||||
|
Addrs []string `json:"addrs"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type TsdbDataSource struct {
|
||||||
|
//config
|
||||||
|
Section TsdbSection
|
||||||
|
SendQueueMaxSize int
|
||||||
|
SendTaskSleepInterval time.Duration
|
||||||
|
|
||||||
|
// 服务节点的一致性哈希环 pk -> node
|
||||||
|
TsdbNodeRing *ConsistentHashRing
|
||||||
|
|
||||||
|
// 发送缓存队列 node -> queue_of_data
|
||||||
|
TsdbQueues map[string]*list.SafeListLimited
|
||||||
|
|
||||||
|
// 连接池 node_address -> connection_pool
|
||||||
|
TsdbConnPools *pools.ConnPools
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tsdb *TsdbDataSource) Init() {
|
||||||
|
|
||||||
|
// init hash ring
|
||||||
|
tsdb.TsdbNodeRing = NewConsistentHashRing(int32(tsdb.Section.Replicas),
|
||||||
|
str.KeysOfMap(tsdb.Section.Cluster))
|
||||||
|
|
||||||
|
// init connPool
|
||||||
|
tsdbInstances := set.NewSafeSet()
|
||||||
|
for _, item := range tsdb.Section.ClusterList {
|
||||||
|
for _, addr := range item.Addrs {
|
||||||
|
tsdbInstances.Add(addr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tsdb.TsdbConnPools = pools.NewConnPools(
|
||||||
|
tsdb.Section.MaxConns, tsdb.Section.MaxIdle, tsdb.Section.ConnTimeout, tsdb.Section.CallTimeout,
|
||||||
|
tsdbInstances.ToSlice(),
|
||||||
|
)
|
||||||
|
|
||||||
|
// init queues
|
||||||
|
tsdb.TsdbQueues = make(map[string]*list.SafeListLimited)
|
||||||
|
for node, item := range tsdb.Section.ClusterList {
|
||||||
|
for _, addr := range item.Addrs {
|
||||||
|
tsdb.TsdbQueues[node+addr] = list.NewSafeListLimited(tsdb.SendQueueMaxSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// start task
|
||||||
|
tsdbConcurrent := tsdb.Section.WorkerNum
|
||||||
|
if tsdbConcurrent < 1 {
|
||||||
|
tsdbConcurrent = 1
|
||||||
|
}
|
||||||
|
for node, item := range tsdb.Section.ClusterList {
|
||||||
|
for _, addr := range item.Addrs {
|
||||||
|
queue := tsdb.TsdbQueues[node+addr]
|
||||||
|
go tsdb.Send2TsdbTask(queue, node, addr, tsdbConcurrent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
go GetIndexLoop()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push2TsdbSendQueue pushes data to a TSDB instance which depends on the consistent ring.
|
||||||
|
func (tsdb *TsdbDataSource) Push2Queue(items []*dataobj.MetricValue) {
|
||||||
|
errCnt := 0
|
||||||
|
for _, item := range items {
|
||||||
|
tsdbItem := convert2TsdbItem(item)
|
||||||
|
stats.Counter.Set("tsdb.queue.push", 1)
|
||||||
|
|
||||||
|
node, err := tsdb.TsdbNodeRing.GetNode(item.PK())
|
||||||
|
if err != nil {
|
||||||
|
logger.Warningf("get tsdb node error: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cnode := tsdb.Section.ClusterList[node]
|
||||||
|
for _, addr := range cnode.Addrs {
|
||||||
|
Q := tsdb.TsdbQueues[node+addr]
|
||||||
|
// 队列已满
|
||||||
|
if !Q.PushFront(tsdbItem) {
|
||||||
|
errCnt += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// statistics
|
||||||
|
if errCnt > 0 {
|
||||||
|
stats.Counter.Set("tsdb.queue.err", errCnt)
|
||||||
|
logger.Error("Push2TsdbSendQueue err num: ", errCnt)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tsdb *TsdbDataSource) Send2TsdbTask(Q *list.SafeListLimited, node, addr string, concurrent int) {
|
||||||
|
batch := tsdb.Section.Batch // 一次发送,最多batch条数据
|
||||||
|
Q = tsdb.TsdbQueues[node+addr]
|
||||||
|
|
||||||
|
sema := semaphore.NewSemaphore(concurrent)
|
||||||
|
|
||||||
|
for {
|
||||||
|
items := Q.PopBackBy(batch)
|
||||||
|
count := len(items)
|
||||||
|
if count == 0 {
|
||||||
|
time.Sleep(tsdb.SendTaskSleepInterval)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
tsdbItems := make([]*dataobj.TsdbItem, count)
|
||||||
|
stats.Counter.Set("points.out.tsdb", count)
|
||||||
|
for i := 0; i < count; i++ {
|
||||||
|
tsdbItems[i] = items[i].(*dataobj.TsdbItem)
|
||||||
|
logger.Debug("send to tsdb->: ", tsdbItems[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
//控制并发
|
||||||
|
sema.Acquire()
|
||||||
|
go func(addr string, tsdbItems []*dataobj.TsdbItem, count int) {
|
||||||
|
defer sema.Release()
|
||||||
|
|
||||||
|
resp := &dataobj.SimpleRpcResponse{}
|
||||||
|
var err error
|
||||||
|
sendOk := false
|
||||||
|
for i := 0; i < 3; i++ { //最多重试3次
|
||||||
|
err = tsdb.TsdbConnPools.Call(addr, "Tsdb.Send", tsdbItems, resp)
|
||||||
|
if err == nil {
|
||||||
|
sendOk = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
time.Sleep(time.Millisecond * 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !sendOk {
|
||||||
|
stats.Counter.Set("points.out.tsdb.err", count)
|
||||||
|
logger.Errorf("send %v to tsdb %s:%s fail: %v", tsdbItems, node, addr, err)
|
||||||
|
} else {
|
||||||
|
logger.Debugf("send to tsdb %s:%s ok", node, addr)
|
||||||
|
}
|
||||||
|
}(addr, tsdbItems, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tsdb *TsdbDataSource) GetInstance(metric, endpoint string, tags map[string]string) []string {
|
||||||
|
counter, err := dataobj.GetCounter(metric, "", tags)
|
||||||
|
errors.Dangerous(err)
|
||||||
|
|
||||||
|
pk := dataobj.PKWithCounter(endpoint, counter)
|
||||||
|
pools, err := tsdb.SelectPoolByPK(pk)
|
||||||
|
addrs := make([]string, len(pools))
|
||||||
|
for i, pool := range pools {
|
||||||
|
addrs[i] = pool.Addr
|
||||||
|
}
|
||||||
|
return addrs
|
||||||
|
}
|
||||||
|
|
||||||
|
// 打到 Tsdb 的数据,要根据 rrdtool 的特定 来限制 step、counterType、timestamp
|
||||||
|
func convert2TsdbItem(d *dataobj.MetricValue) *dataobj.TsdbItem {
|
||||||
|
item := &dataobj.TsdbItem{
|
||||||
|
Endpoint: d.Endpoint,
|
||||||
|
Metric: d.Metric,
|
||||||
|
Value: d.Value,
|
||||||
|
Timestamp: d.Timestamp,
|
||||||
|
Tags: d.Tags,
|
||||||
|
TagsMap: d.TagsMap,
|
||||||
|
Step: int(d.Step),
|
||||||
|
Heartbeat: int(d.Step) * 2,
|
||||||
|
DsType: dataobj.GAUGE,
|
||||||
|
Min: "U",
|
||||||
|
Max: "U",
|
||||||
|
}
|
||||||
|
|
||||||
|
return item
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTags(counter string) (tags string) {
|
||||||
|
idx := strings.IndexAny(counter, "/")
|
||||||
|
if idx == -1 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return counter[idx+1:]
|
||||||
|
}
|
|
@ -3,9 +3,12 @@ package config
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/identity"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/report"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/didi/nightingale/src/modules/transfer/backend"
|
"github.com/didi/nightingale/src/modules/transfer/backend"
|
||||||
|
"github.com/didi/nightingale/src/modules/transfer/backend/tsdb"
|
||||||
"github.com/didi/nightingale/src/toolkits/logger"
|
"github.com/didi/nightingale/src/toolkits/logger"
|
||||||
|
|
||||||
"github.com/spf13/viper"
|
"github.com/spf13/viper"
|
||||||
|
@ -13,12 +16,14 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type ConfYaml struct {
|
type ConfYaml struct {
|
||||||
Debug bool `yaml:"debug"`
|
Debug bool `yaml:"debug"`
|
||||||
MinStep int `yaml:"minStep"`
|
MinStep int `yaml:"minStep"`
|
||||||
Logger logger.LoggerSection `yaml:"logger"`
|
Logger logger.LoggerSection `yaml:"logger"`
|
||||||
Backend backend.BackendSection `yaml:"backend"`
|
Backend backend.BackendSection `yaml:"backend"`
|
||||||
HTTP HTTPSection `yaml:"http"`
|
HTTP HTTPSection `yaml:"http"`
|
||||||
RPC RPCSection `yaml:"rpc"`
|
RPC RPCSection `yaml:"rpc"`
|
||||||
|
Identity identity.IdentitySection `yaml:"identity"`
|
||||||
|
Report report.ReportSection `yaml:"report"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type IndexSection struct {
|
type IndexSection struct {
|
||||||
|
@ -45,13 +50,13 @@ var (
|
||||||
Config *ConfYaml
|
Config *ConfYaml
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewClusterNode(addrs []string) *backend.ClusterNode {
|
func NewClusterNode(addrs []string) *tsdb.ClusterNode {
|
||||||
return &backend.ClusterNode{Addrs: addrs}
|
return &tsdb.ClusterNode{Addrs: addrs}
|
||||||
}
|
}
|
||||||
|
|
||||||
// map["node"]="host1,host2" --> map["node"]=["host1", "host2"]
|
// map["node"]="host1,host2" --> map["node"]=["host1", "host2"]
|
||||||
func formatClusterItems(cluster map[string]string) map[string]*backend.ClusterNode {
|
func formatClusterItems(cluster map[string]string) map[string]*tsdb.ClusterNode {
|
||||||
ret := make(map[string]*backend.ClusterNode)
|
ret := make(map[string]*tsdb.ClusterNode)
|
||||||
for node, clusterStr := range cluster {
|
for node, clusterStr := range cluster {
|
||||||
items := strings.Split(clusterStr, ",")
|
items := strings.Split(clusterStr, ",")
|
||||||
nitems := make([]string, 0)
|
nitems := make([]string, 0)
|
||||||
|
@ -80,21 +85,36 @@ func Parse(conf string) error {
|
||||||
viper.SetDefault("minStep", 1)
|
viper.SetDefault("minStep", 1)
|
||||||
|
|
||||||
viper.SetDefault("backend", map[string]interface{}{
|
viper.SetDefault("backend", map[string]interface{}{
|
||||||
|
"datasource": "tsdb",
|
||||||
|
"straPath": "/api/portal/stras/effective?all=1",
|
||||||
|
})
|
||||||
|
|
||||||
|
viper.SetDefault("backend.judge", map[string]interface{}{
|
||||||
|
"batch": 200, //每次拉取文件的个数
|
||||||
|
"workerNum": 32,
|
||||||
|
"maxConns": 2000, //查询和推送数据的并发个数
|
||||||
|
"maxIdle": 32, //建立的连接池的最大空闲数
|
||||||
|
"connTimeout": 1000, //链接超时时间,单位毫秒
|
||||||
|
"callTimeout": 3000, //访问超时时间,单位毫秒
|
||||||
|
"hbsMod": "monapi",
|
||||||
|
})
|
||||||
|
|
||||||
|
viper.SetDefault("backend.tsdb", map[string]interface{}{
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
|
"name": "tsdb",
|
||||||
"batch": 200, //每次拉取文件的个数
|
"batch": 200, //每次拉取文件的个数
|
||||||
"replicas": 500, //一致性hash虚拟节点
|
|
||||||
"workerNum": 32,
|
"workerNum": 32,
|
||||||
"maxConns": 2000, //查询和推送数据的并发个数
|
"maxConns": 2000, //查询和推送数据的并发个数
|
||||||
"maxIdle": 32, //建立的连接池的最大空闲数
|
"maxIdle": 32, //建立的连接池的最大空闲数
|
||||||
"connTimeout": 1000, //链接超时时间,单位毫秒
|
"connTimeout": 1000, //链接超时时间,单位毫秒
|
||||||
"callTimeout": 3000, //访问超时时间,单位毫秒
|
"callTimeout": 3000, //访问超时时间,单位毫秒
|
||||||
"indexTimeout": 3000, //访问index超时时间,单位毫秒
|
"indexTimeout": 3000, //访问index超时时间,单位毫秒
|
||||||
"straPath": "/api/portal/stras/effective?all=1",
|
"replicas": 500, //一致性hash虚拟节点
|
||||||
"hbsMod": "monapi",
|
|
||||||
})
|
})
|
||||||
|
|
||||||
viper.SetDefault("backend.influxdb", map[string]interface{}{
|
viper.SetDefault("backend.influxdb", map[string]interface{}{
|
||||||
"enabled": false,
|
"enabled": false,
|
||||||
|
"name": "influxdb",
|
||||||
"batch": 200, //每次拉取文件的个数
|
"batch": 200, //每次拉取文件的个数
|
||||||
"maxRetry": 3, //重试次数
|
"maxRetry": 3, //重试次数
|
||||||
"workerNum": 32,
|
"workerNum": 32,
|
||||||
|
@ -104,6 +124,7 @@ func Parse(conf string) error {
|
||||||
|
|
||||||
viper.SetDefault("backend.opentsdb", map[string]interface{}{
|
viper.SetDefault("backend.opentsdb", map[string]interface{}{
|
||||||
"enabled": false,
|
"enabled": false,
|
||||||
|
"name": "opentsdb",
|
||||||
"batch": 200, //每次拉取文件的个数
|
"batch": 200, //每次拉取文件的个数
|
||||||
"maxRetry": 3, //重试次数
|
"maxRetry": 3, //重试次数
|
||||||
"workerNum": 32,
|
"workerNum": 32,
|
||||||
|
@ -113,12 +134,29 @@ func Parse(conf string) error {
|
||||||
"callTimeout": 3000, //访问超时时间,单位毫秒
|
"callTimeout": 3000, //访问超时时间,单位毫秒
|
||||||
})
|
})
|
||||||
|
|
||||||
|
viper.SetDefault("backend.kafka", map[string]interface{}{
|
||||||
|
"enabled": false,
|
||||||
|
"name": "kafka",
|
||||||
|
"maxRetry": 3, //重试次数
|
||||||
|
"connTimeout": 1000, //链接超时时间,单位毫秒
|
||||||
|
"callTimeout": 3000, //访问超时时间,单位毫秒
|
||||||
|
})
|
||||||
|
|
||||||
|
viper.SetDefault("report", map[string]interface{}{
|
||||||
|
"mod": "transfer",
|
||||||
|
"enabled": true,
|
||||||
|
"interval": 4000,
|
||||||
|
"timeout": 3000,
|
||||||
|
"api": "api/hbs/heartbeat",
|
||||||
|
"remark": "",
|
||||||
|
})
|
||||||
|
|
||||||
err = viper.Unmarshal(&Config)
|
err = viper.Unmarshal(&Config)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("cannot read yml[%s]: %v", conf, err)
|
return fmt.Errorf("cannot read yml[%s]: %v", conf, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
Config.Backend.ClusterList = formatClusterItems(Config.Backend.Cluster)
|
Config.Backend.Tsdb.ClusterList = formatClusterItems(Config.Backend.Tsdb.Cluster)
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,7 @@ func updateJudgeQueue() {
|
||||||
if !backend.JudgeQueues.Exists(instance) {
|
if !backend.JudgeQueues.Exists(instance) {
|
||||||
q := list.NewSafeListLimited(backend.DefaultSendQueueMaxSize)
|
q := list.NewSafeListLimited(backend.DefaultSendQueueMaxSize)
|
||||||
backend.JudgeQueues.Set(instance, q)
|
backend.JudgeQueues.Set(instance, q)
|
||||||
go backend.Send2JudgeTask(q, instance, backend.Config.WorkerNum)
|
go backend.Send2JudgeTask(q, instance, backend.Judge.WorkerNum)
|
||||||
} else {
|
} else {
|
||||||
backend.JudgeQueues.UpdateTS(instance)
|
backend.JudgeQueues.UpdateTS(instance)
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,7 @@ func getStrategy() {
|
||||||
perm := rand.Perm(len(addrs))
|
perm := rand.Perm(len(addrs))
|
||||||
var err error
|
var err error
|
||||||
for i := range perm {
|
for i := range perm {
|
||||||
url := fmt.Sprintf("http://%s%s", addrs[perm[i]], backend.Config.StraPath)
|
url := fmt.Sprintf("http://%s%s", addrs[perm[i]], backend.StraPath)
|
||||||
err = httplib.Get(url).SetTimeout(time.Duration(3000) * time.Millisecond).ToJSON(&stras)
|
err = httplib.Get(url).SetTimeout(time.Duration(3000) * time.Millisecond).ToJSON(&stras)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -4,7 +4,8 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/didi/nightingale/src/dataobj"
|
"github.com/toolkits/pkg/logger"
|
||||||
|
|
||||||
"github.com/didi/nightingale/src/modules/transfer/backend"
|
"github.com/didi/nightingale/src/modules/transfer/backend"
|
||||||
"github.com/didi/nightingale/src/modules/transfer/cache"
|
"github.com/didi/nightingale/src/modules/transfer/cache"
|
||||||
"github.com/didi/nightingale/src/toolkits/http/render"
|
"github.com/didi/nightingale/src/toolkits/http/render"
|
||||||
|
@ -50,16 +51,14 @@ func tsdbInstance(c *gin.Context) {
|
||||||
var input tsdbInstanceRecv
|
var input tsdbInstanceRecv
|
||||||
errors.Dangerous(c.ShouldBindJSON(&input))
|
errors.Dangerous(c.ShouldBindJSON(&input))
|
||||||
|
|
||||||
counter, err := backend.GetCounter(input.Metric, "", input.TagMap)
|
dataSource, err := backend.GetDataSourceFor("tsdb")
|
||||||
errors.Dangerous(err)
|
if err != nil {
|
||||||
|
logger.Warningf("could not find datasource")
|
||||||
pk := dataobj.PKWithCounter(input.Endpoint, counter)
|
render.Message(c, err)
|
||||||
pools, err := backend.SelectPoolByPK(pk)
|
return
|
||||||
addrs := make([]string, len(pools))
|
|
||||||
for i, pool := range pools {
|
|
||||||
addrs[i] = pool.Addr
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addrs := dataSource.GetInstance(input.Metric, input.Endpoint, input.TagMap)
|
||||||
render.Data(c, addrs, nil)
|
render.Data(c, addrs, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,24 +40,19 @@ func PushData(c *gin.Context) {
|
||||||
metricValues = append(metricValues, v)
|
metricValues = append(metricValues, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
if backend.Config.Enabled {
|
// send to judge
|
||||||
backend.Push2TsdbSendQueue(metricValues)
|
backend.Push2JudgeQueue(metricValues)
|
||||||
}
|
|
||||||
|
|
||||||
if backend.Config.Enabled {
|
// send to push endpoints
|
||||||
backend.Push2JudgeSendQueue(metricValues)
|
pushEndpoints, err := backend.GetPushEndpoints()
|
||||||
}
|
if err != nil {
|
||||||
|
logger.Errorf("could not find pushendpoint")
|
||||||
if backend.Config.Influxdb.Enabled {
|
render.Data(c, "error", err)
|
||||||
backend.Push2InfluxdbSendQueue(metricValues)
|
return
|
||||||
}
|
} else {
|
||||||
|
for _, pushendpoint := range pushEndpoints {
|
||||||
if backend.Config.OpenTsdb.Enabled {
|
pushendpoint.Push2Queue(metricValues)
|
||||||
backend.Push2OpenTsdbSendQueue(metricValues)
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if backend.Config.Kafka.Enabled {
|
|
||||||
backend.Push2KafkaSendQueue(metricValues)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if msg != "" {
|
if msg != "" {
|
||||||
|
|
|
@ -12,34 +12,22 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type QueryDataReq struct {
|
type QueryDataReq struct {
|
||||||
Start int64 `json:"start"`
|
queryData []dataobj.QueryData
|
||||||
End int64 `json:"end"`
|
|
||||||
Series []backend.SeriesReq `json:"series"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func QueryDataForJudge(c *gin.Context) {
|
|
||||||
var inputs []dataobj.QueryData
|
|
||||||
|
|
||||||
errors.Dangerous(c.ShouldBindJSON(&inputs))
|
|
||||||
resp := backend.FetchData(inputs)
|
|
||||||
render.Data(c, resp, nil)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func QueryData(c *gin.Context) {
|
func QueryData(c *gin.Context) {
|
||||||
stats.Counter.Set("data.api.qp10s", 1)
|
stats.Counter.Set("data.api.qp10s", 1)
|
||||||
|
|
||||||
var input QueryDataReq
|
dataSource, err := backend.GetDataSourceFor("")
|
||||||
|
|
||||||
errors.Dangerous(c.ShouldBindJSON(&input))
|
|
||||||
|
|
||||||
queryData, err := backend.GetSeries(input.Start, input.End, input.Series)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error(err, input)
|
logger.Warningf("could not find datasource")
|
||||||
render.Message(c, "query err")
|
render.Message(c, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := backend.FetchData(queryData)
|
var queryDataReq QueryDataReq
|
||||||
|
errors.Dangerous(c.ShouldBindJSON(&queryDataReq))
|
||||||
|
resp := dataSource.QueryData(queryDataReq.queryData)
|
||||||
render.Data(c, resp, nil)
|
render.Data(c, resp, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,7 +39,13 @@ func QueryDataForUI(c *gin.Context) {
|
||||||
start := input.Start
|
start := input.Start
|
||||||
end := input.End
|
end := input.End
|
||||||
|
|
||||||
resp := backend.FetchDataForUI(input)
|
dataSource, err := backend.GetDataSourceFor("")
|
||||||
|
if err != nil {
|
||||||
|
logger.Warningf("could not find datasource")
|
||||||
|
render.Message(c, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp := dataSource.QueryDataForUI(input)
|
||||||
for _, d := range resp {
|
for _, d := range resp {
|
||||||
data := &dataobj.QueryDataForUIResp{
|
data := &dataobj.QueryDataForUIResp{
|
||||||
Start: d.Start,
|
Start: d.Start,
|
||||||
|
@ -70,7 +64,7 @@ func QueryDataForUI(c *gin.Context) {
|
||||||
comparison := input.Comparisons[i]
|
comparison := input.Comparisons[i]
|
||||||
input.Start = start - comparison
|
input.Start = start - comparison
|
||||||
input.End = end - comparison
|
input.End = end - comparison
|
||||||
res := backend.FetchDataForUI(input)
|
res := dataSource.QueryDataForUI(input)
|
||||||
for _, d := range res {
|
for _, d := range res {
|
||||||
for j := range d.Values {
|
for j := range d.Values {
|
||||||
d.Values[j].Timestamp += comparison
|
d.Values[j].Timestamp += comparison
|
||||||
|
@ -93,3 +87,68 @@ func QueryDataForUI(c *gin.Context) {
|
||||||
|
|
||||||
render.Data(c, respData, nil)
|
render.Data(c, respData, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetMetrics(c *gin.Context) {
|
||||||
|
stats.Counter.Set("metric.qp10s", 1)
|
||||||
|
recv := dataobj.EndpointsRecv{}
|
||||||
|
errors.Dangerous(c.ShouldBindJSON(&recv))
|
||||||
|
|
||||||
|
dataSource, err := backend.GetDataSourceFor("")
|
||||||
|
if err != nil {
|
||||||
|
logger.Warningf("could not find datasource")
|
||||||
|
render.Message(c, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := dataSource.QueryMetrics(recv)
|
||||||
|
|
||||||
|
render.Data(c, resp, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetTagPairs(c *gin.Context) {
|
||||||
|
stats.Counter.Set("tag.qp10s", 1)
|
||||||
|
recv := dataobj.EndpointMetricRecv{}
|
||||||
|
errors.Dangerous(c.ShouldBindJSON(&recv))
|
||||||
|
|
||||||
|
dataSource, err := backend.GetDataSourceFor("")
|
||||||
|
if err != nil {
|
||||||
|
logger.Warningf("could not find datasource")
|
||||||
|
render.Message(c, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := dataSource.QueryTagPairs(recv)
|
||||||
|
render.Data(c, resp, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetIndexByClude(c *gin.Context) {
|
||||||
|
stats.Counter.Set("xclude.qp10s", 1)
|
||||||
|
recvs := make([]dataobj.CludeRecv, 0)
|
||||||
|
errors.Dangerous(c.ShouldBindJSON(&recvs))
|
||||||
|
|
||||||
|
dataSource, err := backend.GetDataSourceFor("")
|
||||||
|
if err != nil {
|
||||||
|
logger.Warningf("could not find datasource")
|
||||||
|
render.Message(c, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := dataSource.QueryIndexByClude(recvs)
|
||||||
|
render.Data(c, resp, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetIndexByFullTags(c *gin.Context) {
|
||||||
|
stats.Counter.Set("counter.qp10s", 1)
|
||||||
|
recvs := make([]dataobj.IndexByFullTagsRecv, 0)
|
||||||
|
errors.Dangerous(c.ShouldBindJSON(&recvs))
|
||||||
|
|
||||||
|
dataSource, err := backend.GetDataSourceFor("")
|
||||||
|
if err != nil {
|
||||||
|
logger.Warningf("could not find datasource")
|
||||||
|
render.Message(c, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := dataSource.QueryIndexByFullTags(recvs)
|
||||||
|
render.Data(c, resp, nil)
|
||||||
|
}
|
||||||
|
|
|
@ -22,6 +22,14 @@ func Config(r *gin.Engine) {
|
||||||
sys.POST("/data/ui", QueryDataForUI)
|
sys.POST("/data/ui", QueryDataForUI)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
index := r.Group("/api/index")
|
||||||
|
{
|
||||||
|
index.POST("/metrics", GetMetrics)
|
||||||
|
index.POST("/tagkv", GetTagPairs)
|
||||||
|
index.POST("/counter/clude", GetIndexByClude)
|
||||||
|
index.POST("/counter/fullmatch", GetIndexByFullTags)
|
||||||
|
}
|
||||||
|
|
||||||
v2 := r.Group("/api/transfer/v2")
|
v2 := r.Group("/api/transfer/v2")
|
||||||
{
|
{
|
||||||
v2.POST("/data", QueryData)
|
v2.POST("/data", QueryData)
|
||||||
|
|
|
@ -36,25 +36,20 @@ func (t *Transfer) Push(args []*dataobj.MetricValue, reply *dataobj.TransferResp
|
||||||
items = append(items, v)
|
items = append(items, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
if backend.Config.Enabled {
|
// send to judge
|
||||||
backend.Push2TsdbSendQueue(items)
|
backend.Push2JudgeQueue(items)
|
||||||
|
|
||||||
|
// send to push endpoints
|
||||||
|
pushEndpoints, err := backend.GetPushEndpoints()
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("could not find pushendpoint")
|
||||||
|
return err
|
||||||
|
} else {
|
||||||
|
for _, pushendpoint := range pushEndpoints {
|
||||||
|
pushendpoint.Push2Queue(items)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if backend.Config.Enabled {
|
|
||||||
backend.Push2JudgeSendQueue(items)
|
|
||||||
}
|
|
||||||
|
|
||||||
if backend.Config.Influxdb.Enabled {
|
|
||||||
backend.Push2InfluxdbSendQueue(items)
|
|
||||||
}
|
|
||||||
|
|
||||||
if backend.Config.OpenTsdb.Enabled {
|
|
||||||
backend.Push2OpenTsdbSendQueue(items)
|
|
||||||
}
|
|
||||||
|
|
||||||
if backend.Config.Kafka.Enabled {
|
|
||||||
backend.Push2KafkaSendQueue(items)
|
|
||||||
}
|
|
||||||
if reply.Invalid == 0 {
|
if reply.Invalid == 0 {
|
||||||
reply.Msg = "ok"
|
reply.Msg = "ok"
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,9 +3,15 @@ package rpc
|
||||||
import (
|
import (
|
||||||
"github.com/didi/nightingale/src/dataobj"
|
"github.com/didi/nightingale/src/dataobj"
|
||||||
"github.com/didi/nightingale/src/modules/transfer/backend"
|
"github.com/didi/nightingale/src/modules/transfer/backend"
|
||||||
|
"github.com/toolkits/pkg/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (t *Transfer) Query(args []dataobj.QueryData, reply *dataobj.QueryDataResp) error {
|
func (t *Transfer) Query(args []dataobj.QueryData, reply *dataobj.QueryDataResp) error {
|
||||||
reply.Data = backend.FetchData(args)
|
dataSource, err := backend.GetDataSourceFor("")
|
||||||
|
if err != nil {
|
||||||
|
logger.Warningf("could not find datasource")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
reply.Data = dataSource.QueryData(args)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,7 +13,9 @@ import (
|
||||||
"github.com/didi/nightingale/src/modules/transfer/http/routes"
|
"github.com/didi/nightingale/src/modules/transfer/http/routes"
|
||||||
"github.com/didi/nightingale/src/modules/transfer/rpc"
|
"github.com/didi/nightingale/src/modules/transfer/rpc"
|
||||||
"github.com/didi/nightingale/src/toolkits/http"
|
"github.com/didi/nightingale/src/toolkits/http"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/identity"
|
||||||
tlogger "github.com/didi/nightingale/src/toolkits/logger"
|
tlogger "github.com/didi/nightingale/src/toolkits/logger"
|
||||||
|
"github.com/didi/nightingale/src/toolkits/report"
|
||||||
"github.com/didi/nightingale/src/toolkits/stats"
|
"github.com/didi/nightingale/src/toolkits/stats"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
@ -61,9 +63,11 @@ func main() {
|
||||||
tlogger.Init(cfg.Logger)
|
tlogger.Init(cfg.Logger)
|
||||||
go stats.Init("n9e.transfer")
|
go stats.Init("n9e.transfer")
|
||||||
|
|
||||||
|
identity.Init(cfg.Identity)
|
||||||
backend.Init(cfg.Backend)
|
backend.Init(cfg.Backend)
|
||||||
cron.Init()
|
cron.Init()
|
||||||
|
|
||||||
|
go report.Init(cfg.Report, "monapi")
|
||||||
go rpc.Start()
|
go rpc.Start()
|
||||||
|
|
||||||
r := gin.New()
|
r := gin.New()
|
||||||
|
|
Loading…
Reference in New Issue