Merge branch 'main' into feature_logs_agent

This commit is contained in:
kongfei 2022-06-02 21:41:21 +08:00
commit 4d90e43c1b
12 changed files with 2126 additions and 281 deletions

View File

@ -1,7 +1,7 @@
.PHONY: start build
APP = categraf
VER = 0.1.0
VER = 0.1.1
all: build

View File

@ -59,6 +59,7 @@ tar zcvf categraf.tar.gz categraf conf
- [x] prometheus
- [x] tomcat
- [x] nvidia_smi
- [x] nginx_upstream_check
- [ ] k8s solution
- [ ] nginx vts
- [ ] mongodb
@ -67,7 +68,7 @@ tar zcvf categraf.tar.gz categraf conf
- [ ] kafka
- [ ] elasticsearch
- [ ] prometheus discovery
- [ ] windows
- [x] windows
- [ ] mssql
- [ ] iis
- [ ] weblogic
@ -77,6 +78,7 @@ tar zcvf categraf.tar.gz categraf conf
- [ ] zookeeper
- [ ] statsd
- [ ] snmp
- [x] switch_legacy
- [ ] ipmi
- [ ] smartctl
- [ ] logging

View File

@ -27,6 +27,7 @@ import (
_ "flashcat.cloud/categraf/inputs/net"
_ "flashcat.cloud/categraf/inputs/net_response"
_ "flashcat.cloud/categraf/inputs/netstat"
_ "flashcat.cloud/categraf/inputs/nginx_upstream_check"
_ "flashcat.cloud/categraf/inputs/ntp"
_ "flashcat.cloud/categraf/inputs/nvidia_smi"
_ "flashcat.cloud/categraf/inputs/oracle"

View File

@ -186,7 +186,7 @@ func convert(item *types.Sample) *prompb.TimeSeries {
// add label: agent_hostname
if _, has := item.Labels[agentHostnameLabelKey]; !has {
if !config.Config.Global.OmitHostname {
item.Labels[agentHostnameLabelKey] = config.Config.Global.Hostname
item.Labels[agentHostnameLabelKey] = config.Config.GetHostname()
}
}

View File

@ -0,0 +1,44 @@
# # collect interval
# interval = 15
[[instances]]
targets = [
# "http://127.0.0.1/status?format=json",
# "http://10.2.3.56/status?format=json"
]
# # append some labels for series
# labels = { region="cloud", product="n9e" }
# # interval = global.interval * interval_times
# interval_times = 1
## Set http_proxy (categraf uses the system wide proxy settings if it's is not set)
# http_proxy = "http://localhost:8888"
## Interface to use when dialing an address
# interface = "eth0"
## HTTP Request Method
# method = "GET"
## Set timeout (default 5 seconds)
# timeout = "5s"
## Whether to follow redirects from the server (defaults to false)
# follow_redirects = false
## Optional HTTP Basic Auth Credentials
# username = "username"
# password = "pa$$word"
## Optional headers
# headers = ["X-From", "categraf", "X-Xyz", "abc"]
## Optional TLS Config
# use_tls = false
# tls_ca = "/etc/categraf/ca.pem"
# tls_cert = "/etc/categraf/cert.pem"
# tls_key = "/etc/categraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false

View File

@ -4,7 +4,6 @@ import (
"encoding/json"
"fmt"
"net"
"os"
"path"
"strings"
"time"
@ -16,6 +15,7 @@ import (
type Global struct {
PrintConfigs bool `toml:"print_configs"`
Hostname string `toml:"hostname"`
IP string `toml:"-"`
OmitHostname bool `toml:"omit_hostname"`
Labels map[string]string `toml:"labels"`
Precision string `toml:"precision"`
@ -68,7 +68,11 @@ func InitConfig(configDir string, debugMode bool, testMode bool) error {
return fmt.Errorf("failed to load configs of dir: %s", configDir)
}
if err := Config.fillHostname(); err != nil {
if err := Config.fillIP(); err != nil {
return err
}
if err := InitHostname(); err != nil {
return err
}
@ -80,38 +84,34 @@ func InitConfig(configDir string, debugMode bool, testMode bool) error {
return nil
}
func (c *ConfigType) fillHostname() error {
if c.Global.Hostname == "" {
name, err := GetHostname()
if err != nil {
return err
}
c.Global.Hostname = name
func (c *ConfigType) fillIP() error {
if !strings.Contains(c.Global.Hostname, "$ip") {
return nil
}
if strings.Contains(c.Global.Hostname, "$hostname") {
name, err := GetHostname()
if err != nil {
return err
}
c.Global.Hostname = strings.Replace(c.Global.Hostname, "$hostname", name, -1)
}
if strings.Contains(c.Global.Hostname, "$ip") {
ip, err := GetOutboundIP()
if err != nil {
return err
}
c.Global.Hostname = strings.Replace(c.Global.Hostname, "$ip", fmt.Sprint(ip), -1)
ip, err := GetOutboundIP()
if err != nil {
return err
}
c.Global.IP = fmt.Sprint(ip)
return nil
}
func (c *ConfigType) GetHostname() string {
ret := c.Global.Hostname
name := Hostname.Get()
if ret == "" {
return name
}
ret = strings.Replace(ret, "$hostname", name, -1)
ret = strings.Replace(ret, "$ip", c.Global.IP, -1)
return ret
}
func GetInterval() time.Duration {
if Config.Global.Interval <= 0 {
return time.Second * 15
@ -120,10 +120,6 @@ func GetInterval() time.Duration {
return time.Duration(Config.Global.Interval)
}
func GetHostname() (string, error) {
return os.Hostname()
}
// Get preferred outbound ip of this machine
func GetOutboundIP() (net.IP, error) {
conn, err := net.Dial("udp", "8.8.8.8:80")

59
config/hostname.go Normal file
View File

@ -0,0 +1,59 @@
package config
import (
"log"
"os"
"sync"
"time"
)
type HostnameCache struct {
name string
sync.RWMutex
}
var Hostname *HostnameCache
func (c *HostnameCache) Get() string {
c.RLock()
n := c.name
c.RUnlock()
return n
}
func (c *HostnameCache) Set(name string) {
if name == c.Get() {
return
}
c.Lock()
c.name = name
c.Unlock()
}
func InitHostname() error {
hostname, err := os.Hostname()
if err != nil {
return err
}
Hostname = &HostnameCache{
name: hostname,
}
go Hostname.update()
return nil
}
func (c *HostnameCache) update() {
for {
time.Sleep(time.Second)
name, err := os.Hostname()
if err != nil {
log.Println("E! failed to get hostname:", err)
} else {
Hostname.Set(name)
}
}
}

View File

@ -108,14 +108,20 @@ func (ins *Instance) createHTTPClient() (*http.Client, error) {
}
}
trans := &http.Transport{
Proxy: httpx.GetProxyFunc(ins.HTTPProxy),
DialContext: dialer.DialContext,
DisableKeepAlives: true,
TLSClientConfig: tlsCfg,
}
if ins.UseTLS {
trans.TLSClientConfig = tlsCfg
}
client := &http.Client{
Transport: &http.Transport{
Proxy: httpx.GetProxyFunc(ins.HTTPProxy),
DialContext: dialer.DialContext,
DisableKeepAlives: true,
TLSClientConfig: tlsCfg,
},
Timeout: time.Duration(ins.ResponseTimeout),
Transport: trans,
Timeout: time.Duration(ins.ResponseTimeout),
}
if !ins.FollowRedirects {

View File

@ -7,98 +7,110 @@ import (
"strings"
"flashcat.cloud/categraf/inputs"
"flashcat.cloud/categraf/pkg/tagx"
"github.com/toolkits/pkg/container/list"
)
var slaveStatusQueries = [2]string{"SHOW ALL SLAVES STATUS", "SHOW SLAVE STATUS"}
var slaveStatusQuerySuffixes = [3]string{" NONBLOCKING", " NOLOCK", ""}
func querySlaveStatus(db *sql.DB) (rows *sql.Rows, err error) {
for _, query := range slaveStatusQueries {
rows, err = db.Query(query)
if err == nil {
return rows, nil
}
// Leverage lock-free SHOW SLAVE STATUS by guessing the right suffix
for _, suffix := range slaveStatusQuerySuffixes {
rows, err = db.Query(fmt.Sprint(query, suffix))
if err == nil {
return rows, nil
}
}
}
return
}
func (m *MySQL) gatherSlaveStatus(slist *list.SafeList, ins *Instance, db *sql.DB, globalTags map[string]string) {
if !ins.GatherSlaveStatus {
return
}
var (
rows *sql.Rows
err error
)
// Try the both syntax for MySQL/Percona and MariaDB
for _, query := range slaveStatusQueries {
rows, err = db.Query(query)
if err != nil { // MySQL/Percona
// Leverage lock-free SHOW SLAVE STATUS by guessing the right suffix
for _, suffix := range slaveStatusQuerySuffixes {
rows, err = db.Query(fmt.Sprint(query, suffix))
if err == nil {
break
}
}
} else { // MariaDB
break
}
}
rows, err := querySlaveStatus(db)
if err != nil {
log.Println("E! failed to query slave status:", err)
return
}
if rows == nil {
log.Println("E! failed to query slave status: rows is nil")
return
}
defer rows.Close()
var (
tags = tagx.Copy(globalTags)
fields = make(map[string]interface{})
textItems = map[string]string{
"master_host": "",
"master_uuid": "",
"channel_name": "",
"connection_name": "",
}
)
slaveCols, err := rows.Columns()
if err != nil {
log.Println("E! failed to get columns of slave rows:", err)
return
}
for rows.Next() {
var key string
var val sql.RawBytes
// As the number of columns varies with mysqld versions,
// and sql.Scan requires []interface{}, we need to create a
// slice of pointers to the elements of slaveData.
scanArgs := make([]interface{}, len(slaveCols))
for i := range scanArgs {
scanArgs[i] = &sql.RawBytes{}
}
if err = rows.Scan(&key, &val); err != nil {
if err := rows.Scan(scanArgs...); err != nil {
continue
}
// key to lower
key = strings.ToLower(key)
masterUUID := columnValue(scanArgs, slaveCols, "Master_UUID")
masterHost := columnValue(scanArgs, slaveCols, "Master_Host")
channelName := columnValue(scanArgs, slaveCols, "Channel_Name") // MySQL & Percona
connectionName := columnValue(scanArgs, slaveCols, "Connection_name") // MariaDB
// collect some string fields
if _, has := textItems[key]; has {
textItems[key] = string(val)
continue
if connectionName != "" {
channelName = connectionName
}
// collect float fields
if _, has := ins.validMetrics[key]; !has {
continue
if channelName == "" {
channelName = "default"
}
if floatVal, ok := parseStatus(val); ok {
fields[key] = floatVal
continue
for i, col := range slaveCols {
key := strings.ToLower(col)
if _, has := ins.validMetrics[key]; !has {
continue
}
if value, ok := parseStatus(*scanArgs[i].(*sql.RawBytes)); ok {
slist.PushFront(inputs.NewSample("slave_status_"+key, value, globalTags, map[string]string{
"master_host": masterHost,
"master_uuid": masterUUID,
"channel_name": channelName,
}))
}
}
}
if textItems["connection_name"] != "" {
textItems["channel_name"] = textItems["connection_name"]
}
// default channel name is empty
if textItems["channel_name"] == "" {
textItems["channel_name"] = "default"
}
for k, v := range fields {
slist.PushFront(inputs.NewSample("slave_status_"+k, v, tags, map[string]string{
"master_host": textItems["master_host"],
"master_uuid": textItems["master_uuid"],
"channel_name": textItems["channel_name"],
}))
}
}
func columnIndex(slaveCols []string, colName string) int {
for idx := range slaveCols {
if slaveCols[idx] == colName {
return idx
}
}
return -1
}
func columnValue(scanArgs []interface{}, slaveCols []string, colName string) string {
var columnIndex = columnIndex(slaveCols, colName)
if columnIndex == -1 {
return ""
}
return string(*scanArgs[columnIndex].(*sql.RawBytes))
}

View File

@ -0,0 +1,298 @@
package nginx_upstream_check
import (
"encoding/json"
"errors"
"fmt"
"io"
"log"
"net"
"net/http"
"net/url"
"strconv"
"sync"
"sync/atomic"
"time"
"flashcat.cloud/categraf/config"
"flashcat.cloud/categraf/inputs"
"flashcat.cloud/categraf/pkg/httpx"
"flashcat.cloud/categraf/pkg/netx"
"flashcat.cloud/categraf/pkg/tls"
"flashcat.cloud/categraf/types"
"github.com/toolkits/pkg/container/list"
)
const inputName = "nginx_upstream_check"
type NginxUpstreamCheck struct {
config.Interval
counter uint64
waitgrp sync.WaitGroup
Instances []*Instance `toml:"instances"`
}
func init() {
inputs.Add(inputName, func() inputs.Input {
return &NginxUpstreamCheck{}
})
}
func (r *NginxUpstreamCheck) Prefix() string {
return inputName
}
func (r *NginxUpstreamCheck) Init() error {
if len(r.Instances) == 0 {
return types.ErrInstancesEmpty
}
for i := 0; i < len(r.Instances); i++ {
if err := r.Instances[i].Init(); err != nil {
return err
}
}
return nil
}
func (r *NginxUpstreamCheck) Drop() {}
func (r *NginxUpstreamCheck) Gather(slist *list.SafeList) {
atomic.AddUint64(&r.counter, 1)
for i := range r.Instances {
ins := r.Instances[i]
r.waitgrp.Add(1)
go func(slist *list.SafeList, ins *Instance) {
defer r.waitgrp.Done()
if ins.IntervalTimes > 0 {
counter := atomic.LoadUint64(&r.counter)
if counter%uint64(ins.IntervalTimes) != 0 {
return
}
}
ins.gatherOnce(slist)
}(slist, ins)
}
r.waitgrp.Wait()
}
type Instance struct {
Labels map[string]string `toml:"labels"`
IntervalTimes int64 `toml:"interval_times"`
Targets []string `toml:"targets"`
HTTPProxy string `toml:"http_proxy"`
Interface string `toml:"interface"`
Method string `toml:"method"`
FollowRedirects bool `toml:"follow_redirects"`
Username string `toml:"username"`
Password string `toml:"password"`
Headers []string `toml:"headers"`
Timeout config.Duration `toml:"timeout"`
tls.ClientConfig
client httpClient
}
type httpClient interface {
Do(req *http.Request) (*http.Response, error)
}
func (ins *Instance) Init() error {
if ins.Timeout < config.Duration(time.Second) {
ins.Timeout = config.Duration(time.Second * 5)
}
if ins.Method == "" {
ins.Method = "GET"
}
if len(ins.Targets) == 0 {
return errors.New("nginx_upstream_check targets empty")
}
client, err := ins.createHTTPClient()
if err != nil {
return fmt.Errorf("failed to create http client: %v", err)
}
ins.client = client
for _, target := range ins.Targets {
addr, err := url.Parse(target)
if err != nil {
return fmt.Errorf("failed to parse target url: %s, error: %v", target, err)
}
if addr.Scheme != "http" && addr.Scheme != "https" {
return fmt.Errorf("only http and https are supported, target: %s", target)
}
}
if len(ins.Headers)%2 != 0 {
return fmt.Errorf("headers invalid")
}
return nil
}
func (ins *Instance) createHTTPClient() (*http.Client, error) {
tlsCfg, err := ins.ClientConfig.TLSConfig()
if err != nil {
return nil, err
}
dialer := &net.Dialer{}
if ins.Interface != "" {
dialer.LocalAddr, err = netx.LocalAddressByInterfaceName(ins.Interface)
if err != nil {
return nil, err
}
}
trans := &http.Transport{
Proxy: httpx.GetProxyFunc(ins.HTTPProxy),
DialContext: dialer.DialContext,
DisableKeepAlives: true,
}
if ins.UseTLS {
trans.TLSClientConfig = tlsCfg
}
client := &http.Client{
Transport: trans,
Timeout: time.Duration(ins.Timeout),
}
if !ins.FollowRedirects {
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
}
}
return client, nil
}
func (ins *Instance) gatherOnce(slist *list.SafeList) {
wg := new(sync.WaitGroup)
for _, target := range ins.Targets {
wg.Add(1)
go func(target string) {
defer wg.Done()
ins.gather(slist, target)
}(target)
}
wg.Wait()
}
type NginxUpstreamCheckData struct {
Servers struct {
Total uint64 `json:"total"`
Generation uint64 `json:"generation"`
Server []NginxUpstreamCheckServer `json:"server"`
} `json:"servers"`
}
type NginxUpstreamCheckServer struct {
Index uint64 `json:"index"`
Upstream string `json:"upstream"`
Name string `json:"name"`
Status string `json:"status"`
Rise uint64 `json:"rise"`
Fall uint64 `json:"fall"`
Type string `json:"type"`
Port uint16 `json:"port"`
}
func (ins *Instance) gather(slist *list.SafeList, target string) {
if config.Config.DebugMode {
log.Println("D! nginx_upstream_check... target:", target)
}
labels := map[string]string{"target": target}
for k, v := range ins.Labels {
labels[k] = v
}
checkData := &NginxUpstreamCheckData{}
err := ins.gatherJSONData(target, checkData)
if err != nil {
log.Println("E! failed to gather json data:", err)
return
}
for _, server := range checkData.Servers.Server {
tags := map[string]string{
"upstream": server.Upstream,
"type": server.Type,
"name": server.Name,
"port": strconv.Itoa(int(server.Port)),
}
fields := map[string]interface{}{
"status_code": getStatusCode(server.Status),
"rise": server.Rise,
"fall": server.Fall,
}
inputs.PushSamples(slist, fields, tags, labels)
}
}
func getStatusCode(status string) uint8 {
switch status {
case "up":
return 1
case "down":
return 2
default:
return 0
}
}
// gatherJSONData query the data source and parse the response JSON
func (ins *Instance) gatherJSONData(address string, value interface{}) error {
request, err := http.NewRequest(ins.Method, address, nil)
if err != nil {
return err
}
if ins.Username != "" || ins.Password != "" {
request.SetBasicAuth(ins.Username, ins.Password)
}
for i := 0; i < len(ins.Headers); i += 2 {
request.Header.Add(ins.Headers[i], ins.Headers[i+1])
if ins.Headers[i] == "Host" {
request.Host = ins.Headers[i+1]
}
}
response, err := ins.client.Do(request)
if err != nil {
return err
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
// ignore the err here; LimitReader returns io.EOF and we're not interested in read errors.
body, _ := io.ReadAll(io.LimitReader(response.Body, 200))
return fmt.Errorf("%s returned HTTP status %s: %q", address, response.Status, body)
}
err = json.NewDecoder(response.Body).Decode(value)
if err != nil {
return err
}
return nil
}

File diff suppressed because it is too large Load Diff

83
inputs/tpl/tpl.go Normal file
View File

@ -0,0 +1,83 @@
package tpl
import (
"sync"
"sync/atomic"
"flashcat.cloud/categraf/config"
"flashcat.cloud/categraf/inputs"
"flashcat.cloud/categraf/types"
"github.com/toolkits/pkg/container/list"
)
const inputName = "plugin_tpl"
type PluginTpl struct {
config.Interval
counter uint64
waitgrp sync.WaitGroup
Instances []*Instance `toml:"instances"`
}
func init() {
inputs.Add(inputName, func() inputs.Input {
return &PluginTpl{}
})
}
func (r *PluginTpl) Prefix() string {
return inputName
}
func (r *PluginTpl) Init() error {
if len(r.Instances) == 0 {
return types.ErrInstancesEmpty
}
for i := 0; i < len(r.Instances); i++ {
if err := r.Instances[i].Init(); err != nil {
return err
}
}
return nil
}
func (r *PluginTpl) Drop() {}
func (r *PluginTpl) Gather(slist *list.SafeList) {
atomic.AddUint64(&r.counter, 1)
for i := range r.Instances {
ins := r.Instances[i]
r.waitgrp.Add(1)
go func(slist *list.SafeList, ins *Instance) {
defer r.waitgrp.Done()
if ins.IntervalTimes > 0 {
counter := atomic.LoadUint64(&r.counter)
if counter%uint64(ins.IntervalTimes) != 0 {
return
}
}
ins.gatherOnce(slist)
}(slist, ins)
}
r.waitgrp.Wait()
}
type Instance struct {
Labels map[string]string `toml:"labels"`
IntervalTimes int64 `toml:"interval_times"`
}
func (ins *Instance) Init() error {
return nil
}
func (ins *Instance) gatherOnce(slist *list.SafeList) {
}