diff --git a/etc/plugins/http_response.yml b/etc/plugins/http_response.yml new file mode 100644 index 00000000..c06f55ba --- /dev/null +++ b/etc/plugins/http_response.yml @@ -0,0 +1,8 @@ +mode: whitelist # whitelist(default),all +metrics: + - name: http_response_http_response_code + - name: http_response_content_length + - name: http_response_response_string_match + - name: http_response_response_status_code_match + - name: http_response_result_code + - name: http_response_response_time diff --git a/etc/plugins/net_response.yml b/etc/plugins/net_response.yml new file mode 100644 index 00000000..e3576247 --- /dev/null +++ b/etc/plugins/net_response.yml @@ -0,0 +1,5 @@ +mode: whitelist # whitelist(default),all +metrics: + - name: net_response_result_code + - name: net_response_response_time + diff --git a/etc/plugins/ping.yml b/etc/plugins/ping.yml new file mode 100644 index 00000000..0490ce0b --- /dev/null +++ b/etc/plugins/ping.yml @@ -0,0 +1,15 @@ +mode: whitelist # whitelist(default),all +metrics: + - name: ping_percentile95_ms + - name: ping_percentile99_ms + - name: ping_percentile50_ms + - name: ping_percent_packet_loss + - name: ping_ttl + - name: ping_maximum_response_ms + - name: ping_standard_deviation_ms + - name: ping_packets_received + - name: ping_packets_transmitted + - name: ping_minimum_response_ms + - name: ping_result_code + - name: ping_average_response_ms + diff --git a/go.mod b/go.mod index c452a0f2..63eb866d 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32 github.com/gin-contrib/pprof v1.3.0 github.com/gin-gonic/gin v1.6.3 + github.com/go-ping/ping v0.0.0-20201115131931-3300c582a663 github.com/go-sql-driver/mysql v1.5.0 github.com/google/uuid v1.1.2 github.com/gorilla/mux v1.7.3 diff --git a/go.sum b/go.sum index 540ab3c0..5919c8bc 100644 --- a/go.sum +++ b/go.sum @@ -346,6 +346,7 @@ github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/validate v0.18.0/go.mod h1:Uh4HdOzKt19xGIGm1qHf/ofbX1YQ4Y+MYsct2VUrAJ4= github.com/go-openapi/validate v0.19.2/go.mod h1:1tRCw7m3jtI8eNWEEliiAqUIcBztB2KDnRCRMUi7GTA= +github.com/go-ping/ping v0.0.0-20201115131931-3300c582a663 h1:jI2GiiRh+pPbey52EVmbU6kuLiXqwy4CXZ4gwUBj8Y0= github.com/go-ping/ping v0.0.0-20201115131931-3300c582a663/go.mod h1:35JbSyV/BYqHwwRA6Zr1uVDm1637YlNOU61wI797NPI= github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A= github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= diff --git a/src/modules/monapi/plugins/all/all.go b/src/modules/monapi/plugins/all/all.go index 84a0d0bc..7a733069 100644 --- a/src/modules/monapi/plugins/all/all.go +++ b/src/modules/monapi/plugins/all/all.go @@ -7,9 +7,12 @@ import ( _ "github.com/didi/nightingale/src/modules/monapi/plugins/elasticsearch" _ "github.com/didi/nightingale/src/modules/monapi/plugins/github" _ "github.com/didi/nightingale/src/modules/monapi/plugins/haproxy" + _ "github.com/didi/nightingale/src/modules/monapi/plugins/http_response" _ "github.com/didi/nightingale/src/modules/monapi/plugins/mongodb" _ "github.com/didi/nightingale/src/modules/monapi/plugins/mysql" + _ "github.com/didi/nightingale/src/modules/monapi/plugins/net_response" _ "github.com/didi/nightingale/src/modules/monapi/plugins/nginx" + _ "github.com/didi/nightingale/src/modules/monapi/plugins/ping" _ "github.com/didi/nightingale/src/modules/monapi/plugins/prometheus" _ "github.com/didi/nightingale/src/modules/monapi/plugins/rabbitmq" _ "github.com/didi/nightingale/src/modules/monapi/plugins/redis" diff --git a/src/modules/monapi/plugins/http_response/http_response.go b/src/modules/monapi/plugins/http_response/http_response.go new file mode 100644 index 00000000..181b4d76 --- /dev/null +++ b/src/modules/monapi/plugins/http_response/http_response.go @@ -0,0 +1,159 @@ +package http_response + +import ( + "errors" + "fmt" + "strings" + "time" + + "github.com/didi/nightingale/src/modules/monapi/collector" + "github.com/didi/nightingale/src/modules/monapi/plugins" + "github.com/didi/nightingale/src/toolkits/i18n" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs/http_response" +) + +func init() { + collector.CollectorRegister(NewCollector()) // for monapi + i18n.DictRegister(langDict) +} + +type Collector struct { + *collector.BaseCollector +} + +func NewCollector() *Collector { + return &Collector{BaseCollector: collector.NewBaseCollector( + "http_response", + collector.RemoteCategory, + func() collector.TelegrafPlugin { return &Rule{} }, + )} +} + +var ( + langDict = map[string]map[string]string{ + "zh": map[string]string{ + "URLS": "地址", + "Method": "方法", + "ResponseTimeout": "响应超时", + "Headers": "Headers", + "Username": "用户名", + "Password": "密码", + "Body": "Body", + "ResponseBodyMaxSize": "ResponseBodyMaxSize", + "ResponseStringMatch": "ResponseStringMatch", + "ResponseStatusCode": "ResponseStatusCode", + "Interface": "Interface", + "HTTPProxy": "HTTPProxy", + "FollowRedirects": "FollowRedirects", + "List of urls to query": "要监测的URL地址", + "HTTP Request Method, default GET": "HTTP 的请求方法,默认是 GET", + "HTTP Request Headers": "HTTP 请求的的 Headers", + "Optional HTTP Basic Auth Credentials, Username": "HTTP Basic 认证的用户名", + "Optional HTTP Basic Auth Credentials, Password": "HTTP Basic 认证的密码", + "Optional HTTP Request Body": "HTTP 请求的 Body", + "If the response body size exceeds this limit a body_read_error will be raised": "如果返回的 body 超过了限制,则会上报 body_read_error 对应的 result_code", + "Optional substring or regex match in body of the response": "返回的 Body 中匹配的字符串,可以部分匹配或者正则", + "Expected response status code, If match response_status_code_match will be 1": "期望返回的状态码,如果匹配则 response_status_code_match 回上报 1", + "Interface to use when dialing an address": "发起请求使用的接口", + "Set http_proxy (telegraf uses the system wide proxy settings if it's is not set)": "HTTP 代理的地址", + "Whether to follow redirects from the server (defaults to false)": "是否自动跳转", + }, + } +) + +type Rule struct { + URLs []string `label:"URLs" json:"urls,required" description:"List of urls to query" example:"https://github.com/didi/nightingale"` + Method string `label:"Method" json:"method" description:"HTTP Request Method, default GET" example:"GET"` + ResponseTimeout int `label:"ResponseTimeout" json:"response_timeout" default:"5" description:"Set response_timeout (default 5 seconds)"` + Headers []string `label:"Headers" json:"headers" description:"HTTP Request Headers" example:"Content-Type: application/json"` + Username string `label:"Username" json:"username" description:"Optional HTTP Basic Auth Credentials, Username" example:"username"` + Password string `label:"Password" json:"password" description:"Optional HTTP Basic Auth Credentials, Password" example:"password"` + Body string `label:"Body" json:"body" description:"Optional HTTP Request Body" example:"{'fake':'data'}"` + ResponseBodyMaxSize int `label:"ResponseBodyMaxSize" json:"response_body_max_size" default:"32" description:"If the response body size exceeds this limit a body_read_error will be raised"` + ResponseStringMatch string `label:"ResponseStringMatch" json:"response_string_match" description:"Optional substring or regex match in body of the response" example:"ok"` + ResponseStatusCode int `label:"ResponseStatusCode" json:"response_status_code" default:"200" description:"Expected response status code, If match response_status_code_match will be 1"` + Interface string `label:"Interface" json:"interface" description:"Interface to use when dialing an address" example:"eth0"` + HTTPProxy string `label:"HTTPProxy" json:"http_proxy" description:"Set http_proxy (telegraf uses the system wide proxy settings if it's is not set)" example:"http://localhost:8888"` + FollowRedirects bool `label:"FollowRedirects" json:"follow_redirects" description:"Whether to follow redirects from the server (defaults to false)"` + plugins.ClientConfig +} + +func checkHTTPMethod(method string) bool { + httpMethods := []string{"GET", "HEAD", "POST", "OPTIONS", "PUT", "DELETE", "TRACE", "CONNECT"} + for _, m := range httpMethods { + if m == method { + return true + } + } + return false +} + +func getHeaderMap(headers []string) (map[string]string, error) { + headerMap := make(map[string]string) + for _, header := range headers { + kv := strings.Split(header, ":") + if len(kv) != 2 { + err := errors.New("header is not valid") + return nil, err + } + k := strings.TrimSpace(kv[0]) + v := strings.TrimSpace(kv[1]) + headerMap[k] = v + } + return headerMap, nil +} + +func (p *Rule) Validate() error { + if len(p.URLs) == 0 || p.URLs[0] == "" { + return fmt.Errorf("http_response.rule.urls must be set") + } + if p.Method == "" { + p.Method = "GET" + } + if !checkHTTPMethod(p.Method) { + return fmt.Errorf("http_response.rule.method is not valid") + } + if p.ResponseTimeout == 0 { + p.ResponseTimeout = 5 + } + if p.ResponseBodyMaxSize == 0 { + p.ResponseBodyMaxSize = 32 + } + + return nil +} + +func (p *Rule) TelegrafInput() (telegraf.Input, error) { + if err := p.Validate(); err != nil { + return nil, err + } + + headerMap, err := getHeaderMap(p.Headers) + if err != nil { + return nil, err + } + + input := &http_response.HTTPResponse{ + URLs: p.URLs, + Method: p.Method, + Username: p.Username, + Password: p.Password, + Headers: headerMap, + Body: p.Body, + ResponseStringMatch: p.ResponseStringMatch, + ResponseStatusCode: p.ResponseStatusCode, + Interface: p.Interface, + HTTPProxy: p.HTTPProxy, + FollowRedirects: p.FollowRedirects, + Log: plugins.GetLogger(), + ClientConfig: p.ClientConfig.TlsClientConfig(), + } + if err := plugins.SetValue(&input.ResponseTimeout.Duration, time.Second*time.Duration(p.ResponseTimeout)); err != nil { + return nil, err + } + if err := plugins.SetValue(&input.ResponseBodyMaxSize.Size, int64(p.ResponseBodyMaxSize)*1024*1024); err != nil { + return nil, err + } + return input, nil +} diff --git a/src/modules/monapi/plugins/http_response/http_response_test.go b/src/modules/monapi/plugins/http_response/http_response_test.go new file mode 100644 index 00000000..da7faf19 --- /dev/null +++ b/src/modules/monapi/plugins/http_response/http_response_test.go @@ -0,0 +1,15 @@ +package http_response + +import ( + "testing" + + "github.com/didi/nightingale/src/modules/monapi/plugins" +) + +func TestCollect(t *testing.T) { + plugins.PluginTest(t, &Rule{ + URLs: []string{"https://github.com"}, + ResponseStatusCode: 200, + ResponseStringMatch: "github", + }) +} diff --git a/src/modules/monapi/plugins/net_response/net_response.go b/src/modules/monapi/plugins/net_response/net_response.go new file mode 100644 index 00000000..a9f1cbdc --- /dev/null +++ b/src/modules/monapi/plugins/net_response/net_response.go @@ -0,0 +1,98 @@ +package net_response + +import ( + "fmt" + "time" + + "github.com/didi/nightingale/src/modules/monapi/collector" + "github.com/didi/nightingale/src/modules/monapi/plugins" + "github.com/didi/nightingale/src/toolkits/i18n" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs/net_response" +) + +func init() { + collector.CollectorRegister(NewCollector()) // for monapi + i18n.DictRegister(langDict) +} + +type Collector struct { + *collector.BaseCollector +} + +func NewCollector() *Collector { + return &Collector{BaseCollector: collector.NewBaseCollector( + "net_response", + collector.RemoteCategory, + func() collector.TelegrafPlugin { return &Rule{} }, + )} +} + +var ( + langDict = map[string]map[string]string{ + "zh": map[string]string{ + "Address": "地址", + "Protocol": "协议", + "Timeout": "请求超时", + "ReadTimeout": "读取超时", + "Send": "Send", + "Expect": "Expect", + "readme - https://github.com/influxdata/telegraf/tree/master/plugins/inputs/net_response": "更多说明详细详见 https://github.com/influxdata/telegraf/tree/master/plugins/inputs/net_response", + "Protocol, must be tcp or udp": "请求协议,必须是 tcp 或 udp", + "Set timeout": "设置超时,单位是秒", + "Set read timeout (only used if expecting a response)": "设置读取的超时(仅当配置了 expect response 时使用),单位是秒", + "string sent to the server, udp required": "发送给服务器的字符串,udp 必须", + "expected string in answer, udp required": "期待服务器返回的字符串(部分),udp 必须", + }, + } +) + +type Rule struct { + Address string `label:"Address" json:"address,required" description:"readme - https://github.com/influxdata/telegraf/tree/master/plugins/inputs/net_response" example:"localhost:80"` + Protocol string `label:"Protocol" json:"protocol" description:"Protocol, must be tcp or udp" example:"tcp"` + Timeout int `label:"Timeout" json:"timeout" default:"1" description:"Set timeout"` + ReadTimeout int `label:"ReadTimeout" json:"read_timeout" default:"1" description:"Set read timeout (only used if expecting a response)"` + Send string `label:"Send" json:"send" description:"string sent to the server, udp required" example:"hello"` + Expect string `label:"Expect" json:"expect" description:"expected string in answer, udp required" example:"hello"` +} + +func (p *Rule) Validate() error { + if p.Address == "" { + return fmt.Errorf("net_response.rule.address must be set") + } + if p.Protocol == "" { + p.Protocol = "tcp" + } + if !(p.Protocol == "tcp" || p.Protocol == "udp") { + return fmt.Errorf("net_response.rule.protocol must be tcp or udp") + } + if p.Timeout == 0 { + p.Timeout = 5 + } + if p.ReadTimeout == 0 { + p.ReadTimeout = 5 + } + + return nil +} + +func (p *Rule) TelegrafInput() (telegraf.Input, error) { + if err := p.Validate(); err != nil { + return nil, err + } + + input := &net_response.NetResponse{ + Address: p.Address, + Protocol: p.Protocol, + Send: p.Send, + Expect: p.Expect, + } + if err := plugins.SetValue(&input.Timeout.Duration, time.Second*time.Duration(p.Timeout)); err != nil { + return nil, err + } + if err := plugins.SetValue(&input.ReadTimeout.Duration, time.Second*time.Duration(p.ReadTimeout)); err != nil { + return nil, err + } + + return input, nil +} diff --git a/src/modules/monapi/plugins/net_response/net_response_test.go b/src/modules/monapi/plugins/net_response/net_response_test.go new file mode 100644 index 00000000..6d20ec6e --- /dev/null +++ b/src/modules/monapi/plugins/net_response/net_response_test.go @@ -0,0 +1,13 @@ +package net_response + +import ( + "testing" + + "github.com/didi/nightingale/src/modules/monapi/plugins" +) + +func TestCollect(t *testing.T) { + plugins.PluginTest(t, &Rule{ + Address: "github.com:443", + }) +} diff --git a/src/modules/monapi/plugins/ping/ping.go b/src/modules/monapi/plugins/ping/ping.go new file mode 100644 index 00000000..cc784fe9 --- /dev/null +++ b/src/modules/monapi/plugins/ping/ping.go @@ -0,0 +1,121 @@ +package ping + +import ( + "fmt" + + "github.com/didi/nightingale/src/modules/monapi/collector" + "github.com/didi/nightingale/src/modules/monapi/plugins" + "github.com/didi/nightingale/src/modules/monapi/plugins/ping/ping" + "github.com/didi/nightingale/src/toolkits/i18n" + "github.com/influxdata/telegraf" +) + +func init() { + collector.CollectorRegister(NewCollector()) // for monapi + i18n.DictRegister(langDict) +} + +type Collector struct { + *collector.BaseCollector +} + +func NewCollector() *Collector { + return &Collector{BaseCollector: collector.NewBaseCollector( + "ping", + collector.RemoteCategory, + func() collector.TelegrafPlugin { return &Rule{} }, + )} +} + +var ( + langDict = map[string]map[string]string{ + "zh": map[string]string{ + "URLs": "地址", + "PingInterval": "Ping间隔", + "Count": "次数", + "Timeout": "单Ping超时", + "Deadline": "总体超时", + "Method": "模式", + "Interface": "Interface", + "Binary": "Binary", + "IPv6": "IPv6", + "URLs to ping": "要 Ping 的目标地址,IP或域名", + "Interval at which to ping (ping -i ), default 1": "Ping 包的间隔(ping -i ),默认是 1 秒", + "Number of pings to send (ping -c ), default 4": "Ping 包的次数(ping -c ),默认是 4 次", + "Per-ping timeout, in seconds. (ping -W ), default 1.0": "每个 Ping 的超时时间(ping -W ),默认是 1秒", + "Ping deadline, in seconds. (ping -w ), default 10": "整个 Ping 周期的超时时间(ping -w ),默认是 10 秒", + "Method defines how to ping (native or exec), default native": "ping的模式,命令行(exec)或原生(natvie), 默认是 native", + "Interface or source address to send ping from (ping -I/-S )": "Ping 发起的源接口(或源地址),(ping -I/-S )", + "Ping executable binary, default ping": "exec 模式时,ping 的命令。默认是 ping", + "Use only IPv6 addresses when resolving a hostname": "仅将目标域名解析为 IPv6 地址", + }, + } +) + +type Rule struct { + Urls []string `label:"Urls" json:"urls,required" description:"URLs to ping" example:"github.com"` + PingInterval int `label:"PingInterval" json:"ping_interval" default:"1" description:"Interval at which to ping (ping -i ), default 1"` + Count int `label:"Count" json:"count" default:"4" description:"Number of pings to send (ping -c ), default 4"` + Timeout int `label:"Timeout" json:"timeout" default:"1" description:"Per-ping timeout, in seconds. (ping -W ), default 1.0"` + Deadline int `label:"Deadline" json:"deadline" default:"10" description:"Ping deadline, in seconds. (ping -w ), default 10"` + Method string `label:"Method" json:"method" description:"Method defines how to ping (native or exec), default native" example:"native"` + Interface string `label:"Interface" json:"interface" description:"Interface or source address to send ping from (ping -I/-S )" example:"eth0"` + Binary string `label:"Binary" json:"binary" description:"Ping executable binary, default ping" example:"ping"` + IPv6 bool `label:"Ipv6" json:"ipv6" description:"Use only IPv6 addresses when resolving a hostname."` +} + +func (p *Rule) Validate() error { + if len(p.Urls) == 0 || p.Urls[0] == "" { + return fmt.Errorf("ping.rule.urls must be set") + } + if p.PingInterval == 0 { + p.PingInterval = 1 + } + if p.Count == 0 { + p.Count = 1 + } + if p.Timeout == 0 { + p.Timeout = 1 + } + if p.Deadline == 0 { + p.Deadline = 10 + } + if p.Method == "" { + p.Method = "native" + } + if !(p.Method == "exec" || p.Method == "native") { + return fmt.Errorf("ping.rule.method must be exec or native") + } + if p.Binary == "" { + p.Binary = "ping" + } + + return nil +} + +func (p *Rule) TelegrafInput() (telegraf.Input, error) { + if err := p.Validate(); err != nil { + return nil, err + } + + input := &ping.Ping{ + PingHost: ping.MyHostPinger, + PingInterval: float64(p.PingInterval), + Count: p.Count, + Timeout: float64(p.Timeout), + Deadline: p.Deadline, + Method: p.Method, + Interface: p.Interface, + Urls: p.Urls, + Binary: p.Binary, + IPv6: p.IPv6, + Log: plugins.GetLogger(), + Arguments: []string{}, + Percentiles: []int{50, 95, 99}, + } + input.NativePingFunc = input.NativePing + if err := input.Init(); err != nil { + return nil, err + } + return input, nil +} diff --git a/src/modules/monapi/plugins/ping/ping/exec.go b/src/modules/monapi/plugins/ping/ping/exec.go new file mode 100644 index 00000000..9e1d86f3 --- /dev/null +++ b/src/modules/monapi/plugins/ping/ping/exec.go @@ -0,0 +1,47 @@ +package ping + +import ( + "bytes" + "errors" + "os/exec" + "time" +) + +var ErrTimeout = errors.New("command timed out") + +// CombinedOutputTimeout runs the given command with the given timeout and +// returns the combined output of stdout and stderr. +// If the command times out, it attempts to kill the process. +func CombinedOutputTimeout(c *exec.Cmd, timeout time.Duration) ([]byte, error) { + var b bytes.Buffer + c.Stdout = &b + c.Stderr = &b + if err := c.Start(); err != nil { + return nil, err + } + err := WaitTimeout(c, timeout) + return b.Bytes(), err +} + +// StdOutputTimeout runs the given command with the given timeout and +// returns the output of stdout. +// If the command times out, it attempts to kill the process. +func StdOutputTimeout(c *exec.Cmd, timeout time.Duration) ([]byte, error) { + var b bytes.Buffer + c.Stdout = &b + c.Stderr = nil + if err := c.Start(); err != nil { + return nil, err + } + err := WaitTimeout(c, timeout) + return b.Bytes(), err +} + +// RunTimeout runs the given command with the given timeout. +// If the command times out, it attempts to kill the process. +func RunTimeout(c *exec.Cmd, timeout time.Duration) error { + if err := c.Start(); err != nil { + return err + } + return WaitTimeout(c, timeout) +} diff --git a/src/modules/monapi/plugins/ping/ping/exec_unix.go b/src/modules/monapi/plugins/ping/ping/exec_unix.go new file mode 100644 index 00000000..6a38011e --- /dev/null +++ b/src/modules/monapi/plugins/ping/ping/exec_unix.go @@ -0,0 +1,58 @@ +// +build !windows + +package ping + +import ( + "log" + "os/exec" + "syscall" + "time" +) + +// KillGrace is the amount of time we allow a process to shutdown before +// sending a SIGKILL. +const KillGrace = 5 * time.Second + +// WaitTimeout waits for the given command to finish with a timeout. +// It assumes the command has already been started. +// If the command times out, it attempts to kill the process. +func WaitTimeout(c *exec.Cmd, timeout time.Duration) error { + var kill *time.Timer + term := time.AfterFunc(timeout, func() { + err := c.Process.Signal(syscall.SIGTERM) + if err != nil { + log.Printf("E! [agent] Error terminating process: %s", err) + return + } + + kill = time.AfterFunc(KillGrace, func() { + err := c.Process.Kill() + if err != nil { + log.Printf("E! [agent] Error killing process: %s", err) + return + } + }) + }) + + err := c.Wait() + + // Shutdown all timers + if kill != nil { + kill.Stop() + } + termSent := !term.Stop() + + // If the process exited without error treat it as success. This allows a + // process to do a clean shutdown on signal. + if err == nil { + return nil + } + + // If SIGTERM was sent then treat any process error as a timeout. + if termSent { + return ErrTimeout + } + + // Otherwise there was an error unrelated to termination. + return err +} diff --git a/src/modules/monapi/plugins/ping/ping/exec_windows.go b/src/modules/monapi/plugins/ping/ping/exec_windows.go new file mode 100644 index 00000000..8efed399 --- /dev/null +++ b/src/modules/monapi/plugins/ping/ping/exec_windows.go @@ -0,0 +1,41 @@ +// +build windows + +package ping + +import ( + "log" + "os/exec" + "time" +) + +// WaitTimeout waits for the given command to finish with a timeout. +// It assumes the command has already been started. +// If the command times out, it attempts to kill the process. +func WaitTimeout(c *exec.Cmd, timeout time.Duration) error { + timer := time.AfterFunc(timeout, func() { + err := c.Process.Kill() + if err != nil { + log.Printf("E! [agent] Error killing process: %s", err) + return + } + }) + + err := c.Wait() + + // Shutdown all timers + termSent := !timer.Stop() + + // If the process exited without error treat it as success. This allows a + // process to do a clean shutdown on signal. + if err == nil { + return nil + } + + // If SIGTERM was sent then treat any process error as a timeout. + if termSent { + return ErrTimeout + } + + // Otherwise there was an error unrelated to termination. + return err +} diff --git a/src/modules/monapi/plugins/ping/ping/ping.go b/src/modules/monapi/plugins/ping/ping/ping.go new file mode 100644 index 00000000..0b50266b --- /dev/null +++ b/src/modules/monapi/plugins/ping/ping/ping.go @@ -0,0 +1,360 @@ +package ping + +import ( + "errors" + "fmt" + "math" + "net" + "os/exec" + "runtime" + "sort" + "strings" + "sync" + "time" + + "github.com/go-ping/ping" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +// HostPinger is a function that runs the "ping" function using a list of +// passed arguments. This can be easily switched with a mocked ping function +// for unit test purposes (see ping_test.go) +type HostPinger func(binary string, timeout float64, args ...string) (string, error) + +type Ping struct { + // wg is used to wait for ping with multiple URLs + wg sync.WaitGroup + + // Pre-calculated interval and timeout + calcInterval time.Duration + calcTimeout time.Duration + + sourceAddress string + + Log telegraf.Logger `toml:"-"` + + // Interval at which to ping (ping -i ) + PingInterval float64 `toml:"ping_interval"` + + // Number of pings to send (ping -c ) + Count int + + // Per-ping timeout, in seconds. 0 means no timeout (ping -W ) + Timeout float64 + + // Ping deadline, in seconds. 0 means no deadline. (ping -w ) + Deadline int + + // Interface or source address to send ping from (ping -I/-S ) + Interface string + + // URLs to ping + Urls []string + + // Method defines how to ping (native or exec) + Method string + + // Ping executable binary + Binary string + + // Arguments for ping command. When arguments is not empty, system binary will be used and + // other options (ping_interval, timeout, etc) will be ignored + Arguments []string + + // Whether to resolve addresses using ipv6 or not. + IPv6 bool + + // host ping function + PingHost HostPinger + + NativePingFunc NativePingFunc + + // Calculate the given percentiles when using native method + Percentiles []int +} + +func (*Ping) Description() string { + return "Ping given url(s) and return statistics" +} + +const sampleConfig = ` + ## Hosts to send ping packets to. + urls = ["example.org"] + + ## Method used for sending pings, can be either "exec" or "native". When set + ## to "exec" the systems ping command will be executed. When set to "native" + ## the plugin will send pings directly. + ## + ## While the default is "exec" for backwards compatibility, new deployments + ## are encouraged to use the "native" method for improved compatibility and + ## performance. + # method = "exec" + + ## Number of ping packets to send per interval. Corresponds to the "-c" + ## option of the ping command. + # count = 1 + + ## Time to wait between sending ping packets in seconds. Operates like the + ## "-i" option of the ping command. + # ping_interval = 1.0 + + ## If set, the time to wait for a ping response in seconds. Operates like + ## the "-W" option of the ping command. + # timeout = 1.0 + + ## If set, the total ping deadline, in seconds. Operates like the -w option + ## of the ping command. + # deadline = 10 + + ## Interface or source address to send ping from. Operates like the -I or -S + ## option of the ping command. + # interface = "" + + ## Percentiles to calculate. This only works with the native method. + # percentiles = [50, 95, 99] + + ## Specify the ping executable binary. + # binary = "ping" + + ## Arguments for ping command. When arguments is not empty, the command from + ## the binary option will be used and other options (ping_interval, timeout, + ## etc) will be ignored. + # arguments = ["-c", "3"] + + ## Use only IPv6 addresses when resolving a hostname. + # ipv6 = false +` + +func (*Ping) SampleConfig() string { + return sampleConfig +} + +func (p *Ping) Gather(acc telegraf.Accumulator) error { + for _, host := range p.Urls { + p.wg.Add(1) + go func(host string) { + defer p.wg.Done() + + switch p.Method { + case "native": + p.pingToURLNative(host, acc) + default: + p.pingToURL(host, acc) + } + }(host) + } + + p.wg.Wait() + + return nil +} + +type pingStats struct { + ping.Statistics + ttl int +} + +type NativePingFunc func(destination string) (*pingStats, error) + +func (p *Ping) NativePing(destination string) (*pingStats, error) { + ps := &pingStats{} + + pinger, err := ping.NewPinger(destination) + if err != nil { + return nil, fmt.Errorf("failed to create new pinger: %w", err) + } + + // Required for windows. Despite the method name, this should work without the need to elevate privileges and has been tested on Windows 10 + if runtime.GOOS == "windows" { + pinger.SetPrivileged(true) + } + + if p.IPv6 { + pinger.SetNetwork("ip6") + } + + pinger.Source = p.sourceAddress + pinger.Interval = p.calcInterval + + if p.Deadline > 0 { + pinger.Timeout = time.Duration(p.Deadline) * time.Second + } + + // Get Time to live (TTL) of first response, matching original implementation + once := &sync.Once{} + pinger.OnRecv = func(pkt *ping.Packet) { + once.Do(func() { + ps.ttl = pkt.Ttl + }) + } + + pinger.Count = p.Count + err = pinger.Run() + if err != nil { + return nil, fmt.Errorf("failed to run pinger: %w", err) + } + + ps.Statistics = *pinger.Statistics() + + return ps, nil +} + +func (p *Ping) pingToURLNative(destination string, acc telegraf.Accumulator) { + + tags := map[string]string{"url": destination} + fields := map[string]interface{}{} + + stats, err := p.NativePingFunc(destination) + if err != nil { + if strings.Contains(err.Error(), "unknown") { + fields["result_code"] = 1 + } else { + fields["result_code"] = 2 + } + acc.AddFields("ping", fields, tags) + return + } + + fields = map[string]interface{}{ + "result_code": 0, + "packets_transmitted": stats.PacketsSent, + "packets_received": stats.PacketsRecv, + } + + if stats.PacketsSent == 0 { + fields["result_code"] = 2 + acc.AddFields("ping", fields, tags) + return + } + + if stats.PacketsRecv == 0 { + fields["result_code"] = 1 + fields["percent_packet_loss"] = float64(100) + acc.AddFields("ping", fields, tags) + return + } + + sort.Sort(durationSlice(stats.Rtts)) + for _, perc := range p.Percentiles { + var value = percentile(durationSlice(stats.Rtts), perc) + var field = fmt.Sprintf("percentile%v_ms", perc) + fields[field] = float64(value.Nanoseconds()) / float64(time.Millisecond) + } + + // Set TTL only on supported platform. See golang.org/x/net/ipv4/payload_cmsg.go + switch runtime.GOOS { + case "aix", "darwin", "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "solaris": + fields["ttl"] = stats.ttl + } + + fields["percent_packet_loss"] = float64(stats.PacketLoss) + fields["minimum_response_ms"] = float64(stats.MinRtt) / float64(time.Millisecond) + fields["average_response_ms"] = float64(stats.AvgRtt) / float64(time.Millisecond) + fields["maximum_response_ms"] = float64(stats.MaxRtt) / float64(time.Millisecond) + fields["standard_deviation_ms"] = float64(stats.StdDevRtt) / float64(time.Millisecond) + + acc.AddFields("ping", fields, tags) +} + +type durationSlice []time.Duration + +func (p durationSlice) Len() int { return len(p) } +func (p durationSlice) Less(i, j int) bool { return p[i] < p[j] } +func (p durationSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +// R7 from Hyndman and Fan (1996), which matches Excel +func percentile(values durationSlice, perc int) time.Duration { + if len(values) == 0 { + return 0 + } + if perc < 0 { + perc = 0 + } + if perc > 100 { + perc = 100 + } + var percFloat = float64(perc) / 100.0 + + var count = len(values) + var rank = percFloat * float64(count-1) + var rankInteger = int(rank) + var rankFraction = rank - math.Floor(rank) + + if rankInteger >= count-1 { + return values[count-1] + } + + upper := values[rankInteger+1] + lower := values[rankInteger] + return lower + time.Duration(rankFraction*float64(upper-lower)) +} + +// Init ensures the plugin is configured correctly. +func (p *Ping) Init() error { + if p.Count < 1 { + return errors.New("bad number of packets to transmit") + } + + // The interval cannot be below 0.2 seconds, matching ping implementation: https://linux.die.net/man/8/ping + if p.PingInterval < 0.2 { + p.calcInterval = time.Duration(.2 * float64(time.Second)) + } else { + p.calcInterval = time.Duration(p.PingInterval * float64(time.Second)) + } + + // If no timeout is given default to 5 seconds, matching original implementation + if p.Timeout == 0 { + p.calcTimeout = time.Duration(5) * time.Second + } else { + p.calcTimeout = time.Duration(p.Timeout) * time.Second + } + + // Support either an IP address or interface name + if p.Interface != "" { + if addr := net.ParseIP(p.Interface); addr != nil { + p.sourceAddress = p.Interface + } else { + i, err := net.InterfaceByName(p.Interface) + if err != nil { + return fmt.Errorf("failed to get interface: %w", err) + } + addrs, err := i.Addrs() + if err != nil { + return fmt.Errorf("failed to get the address of interface: %w", err) + } + p.sourceAddress = addrs[0].(*net.IPNet).IP.String() + } + } + + return nil +} + +func MyHostPinger(binary string, timeout float64, args ...string) (string, error) { + bin, err := exec.LookPath(binary) + if err != nil { + return "", err + } + c := exec.Command(bin, args...) + out, err := CombinedOutputTimeout(c, + time.Second*time.Duration(timeout+5)) + return string(out), err +} + +func init() { + inputs.Add("ping", func() telegraf.Input { + p := &Ping{ + PingHost: MyHostPinger, + PingInterval: 1.0, + Count: 1, + Timeout: 1.0, + Deadline: 10, + Method: "exec", + Binary: "ping", + Arguments: []string{}, + Percentiles: []int{}, + } + p.NativePingFunc = p.NativePing + return p + }) +} diff --git a/src/modules/monapi/plugins/ping/ping/ping_notwindows.go b/src/modules/monapi/plugins/ping/ping/ping_notwindows.go new file mode 100644 index 00000000..48816cc5 --- /dev/null +++ b/src/modules/monapi/plugins/ping/ping/ping_notwindows.go @@ -0,0 +1,235 @@ +// +build !windows + +package ping + +import ( + "errors" + "fmt" + "os/exec" + "regexp" + "runtime" + "strconv" + "strings" + "syscall" + + "github.com/influxdata/telegraf" +) + +func (p *Ping) pingToURL(u string, acc telegraf.Accumulator) { + tags := map[string]string{"url": u} + fields := map[string]interface{}{"result_code": 0} + + out, err := p.PingHost(p.Binary, 60.0, p.args(u, runtime.GOOS)...) + if err != nil { + // Some implementations of ping return a non-zero exit code on + // timeout, if this occurs we will not exit and try to parse + // the output. + // Linux iputils-ping returns 1, BSD-derived ping returns 2. + status := -1 + if exitError, ok := err.(*exec.ExitError); ok { + if ws, ok := exitError.Sys().(syscall.WaitStatus); ok { + status = ws.ExitStatus() + fields["result_code"] = status + } + } + + var timeoutExitCode int + switch runtime.GOOS { + case "freebsd", "netbsd", "openbsd", "darwin": + timeoutExitCode = 2 + case "linux": + timeoutExitCode = 1 + default: + timeoutExitCode = 1 + } + + if status != timeoutExitCode { + // Combine go err + stderr output + out = strings.TrimSpace(out) + if len(out) > 0 { + acc.AddError(fmt.Errorf("host %s: %s, %s", u, out, err)) + } else { + acc.AddError(fmt.Errorf("host %s: %s", u, err)) + } + fields["result_code"] = 2 + acc.AddFields("ping", fields, tags) + return + } + } + trans, rec, ttl, min, avg, max, stddev, err := processPingOutput(out) + if err != nil { + // fatal error + acc.AddError(fmt.Errorf("%s: %s", err, u)) + fields["result_code"] = 2 + acc.AddFields("ping", fields, tags) + return + } + + // Calculate packet loss percentage + loss := float64(trans-rec) / float64(trans) * 100.0 + + fields["packets_transmitted"] = trans + fields["packets_received"] = rec + fields["percent_packet_loss"] = loss + if ttl >= 0 { + fields["ttl"] = ttl + } + if min >= 0 { + fields["minimum_response_ms"] = min + } + if avg >= 0 { + fields["average_response_ms"] = avg + } + if max >= 0 { + fields["maximum_response_ms"] = max + } + if stddev >= 0 { + fields["standard_deviation_ms"] = stddev + } + acc.AddFields("ping", fields, tags) +} + +// args returns the arguments for the 'ping' executable +func (p *Ping) args(url string, system string) []string { + if len(p.Arguments) > 0 { + return append(p.Arguments, url) + } + + // build the ping command args based on toml config + args := []string{"-c", strconv.Itoa(p.Count), "-n", "-s", "16"} + if p.PingInterval > 0 { + args = append(args, "-i", strconv.FormatFloat(p.PingInterval, 'f', -1, 64)) + } + if p.Timeout > 0 { + switch system { + case "darwin": + args = append(args, "-W", strconv.FormatFloat(p.Timeout*1000, 'f', -1, 64)) + case "freebsd": + if strings.Contains(p.Binary, "ping6") { + args = append(args, "-x", strconv.FormatFloat(p.Timeout*1000, 'f', -1, 64)) + } else { + args = append(args, "-W", strconv.FormatFloat(p.Timeout*1000, 'f', -1, 64)) + } + case "netbsd", "openbsd": + args = append(args, "-W", strconv.FormatFloat(p.Timeout*1000, 'f', -1, 64)) + case "linux": + args = append(args, "-W", strconv.FormatFloat(p.Timeout, 'f', -1, 64)) + default: + // Not sure the best option here, just assume GNU ping? + args = append(args, "-W", strconv.FormatFloat(p.Timeout, 'f', -1, 64)) + } + } + if p.Deadline > 0 { + switch system { + case "freebsd": + if strings.Contains(p.Binary, "ping6") { + args = append(args, "-X", strconv.Itoa(p.Deadline)) + } else { + args = append(args, "-t", strconv.Itoa(p.Deadline)) + } + case "darwin", "netbsd", "openbsd": + args = append(args, "-t", strconv.Itoa(p.Deadline)) + case "linux": + args = append(args, "-w", strconv.Itoa(p.Deadline)) + default: + // not sure the best option here, just assume gnu ping? + args = append(args, "-w", strconv.Itoa(p.Deadline)) + } + } + if p.Interface != "" { + switch system { + case "darwin": + args = append(args, "-I", p.Interface) + case "freebsd", "netbsd", "openbsd": + args = append(args, "-S", p.Interface) + case "linux": + args = append(args, "-I", p.Interface) + default: + // not sure the best option here, just assume gnu ping? + args = append(args, "-i", p.Interface) + } + } + args = append(args, url) + return args +} + +// processPingOutput takes in a string output from the ping command, like: +// +// ping www.google.com (173.194.115.84): 56 data bytes +// 64 bytes from 173.194.115.84: icmp_seq=0 ttl=54 time=52.172 ms +// 64 bytes from 173.194.115.84: icmp_seq=1 ttl=54 time=34.843 ms +// +// --- www.google.com ping statistics --- +// 2 packets transmitted, 2 packets received, 0.0% packet loss +// round-trip min/avg/max/stddev = 34.843/43.508/52.172/8.664 ms +// +// It returns (, , ) +func processPingOutput(out string) (int, int, int, float64, float64, float64, float64, error) { + var trans, recv, ttl int = 0, 0, -1 + var min, avg, max, stddev float64 = -1.0, -1.0, -1.0, -1.0 + // Set this error to nil if we find a 'transmitted' line + err := errors.New("Fatal error processing ping output") + lines := strings.Split(out, "\n") + for _, line := range lines { + // Reading only first TTL, ignoring other TTL messages + if ttl == -1 && (strings.Contains(line, "ttl=") || strings.Contains(line, "hlim=")) { + ttl, err = getTTL(line) + } else if strings.Contains(line, "transmitted") && + strings.Contains(line, "received") { + trans, recv, err = getPacketStats(line, trans, recv) + if err != nil { + return trans, recv, ttl, min, avg, max, stddev, err + } + } else if strings.Contains(line, "min/avg/max") { + min, avg, max, stddev, err = checkRoundTripTimeStats(line, min, avg, max, stddev) + if err != nil { + return trans, recv, ttl, min, avg, max, stddev, err + } + } + } + return trans, recv, ttl, min, avg, max, stddev, err +} + +func getPacketStats(line string, trans, recv int) (int, int, error) { + stats := strings.Split(line, ", ") + // Transmitted packets + trans, err := strconv.Atoi(strings.Split(stats[0], " ")[0]) + if err != nil { + return trans, recv, err + } + // Received packets + recv, err = strconv.Atoi(strings.Split(stats[1], " ")[0]) + return trans, recv, err +} + +func getTTL(line string) (int, error) { + ttlLine := regexp.MustCompile(`(ttl|hlim)=(\d+)`) + ttlMatch := ttlLine.FindStringSubmatch(line) + return strconv.Atoi(ttlMatch[2]) +} + +func checkRoundTripTimeStats(line string, min, avg, max, + stddev float64) (float64, float64, float64, float64, error) { + stats := strings.Split(line, " ")[3] + data := strings.Split(stats, "/") + + min, err := strconv.ParseFloat(data[0], 64) + if err != nil { + return min, avg, max, stddev, err + } + avg, err = strconv.ParseFloat(data[1], 64) + if err != nil { + return min, avg, max, stddev, err + } + max, err = strconv.ParseFloat(data[2], 64) + if err != nil { + return min, avg, max, stddev, err + } + if len(data) == 4 { + stddev, err = strconv.ParseFloat(data[3], 64) + if err != nil { + return min, avg, max, stddev, err + } + } + return min, avg, max, stddev, err +} diff --git a/src/modules/monapi/plugins/ping/ping/ping_test.go b/src/modules/monapi/plugins/ping/ping/ping_test.go new file mode 100644 index 00000000..f1e15d5b --- /dev/null +++ b/src/modules/monapi/plugins/ping/ping/ping_test.go @@ -0,0 +1,543 @@ +// +build !windows + +package ping + +import ( + "context" + "errors" + "fmt" + "net" + "reflect" + "sort" + "testing" + "time" + + "github.com/go-ping/ping" + "github.com/influxdata/telegraf/plugins/inputs" + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// BSD/Darwin ping output +var bsdPingOutput = ` +PING www.google.com (216.58.217.36): 56 data bytes +64 bytes from 216.58.217.36: icmp_seq=0 ttl=55 time=15.087 ms +64 bytes from 216.58.217.36: icmp_seq=1 ttl=55 time=21.564 ms +64 bytes from 216.58.217.36: icmp_seq=2 ttl=55 time=27.263 ms +64 bytes from 216.58.217.36: icmp_seq=3 ttl=55 time=18.828 ms +64 bytes from 216.58.217.36: icmp_seq=4 ttl=55 time=18.378 ms + +--- www.google.com ping statistics --- +5 packets transmitted, 5 packets received, 0.0% packet loss +round-trip min/avg/max/stddev = 15.087/20.224/27.263/4.076 ms +` + +// FreeBSD ping6 output +var freebsdPing6Output = ` +PING6(64=40+8+16 bytes) 2001:db8::1 --> 2a00:1450:4001:824::2004 +24 bytes from 2a00:1450:4001:824::2004, icmp_seq=0 hlim=117 time=93.870 ms +24 bytes from 2a00:1450:4001:824::2004, icmp_seq=1 hlim=117 time=40.278 ms +24 bytes from 2a00:1450:4001:824::2004, icmp_seq=2 hlim=120 time=59.077 ms +24 bytes from 2a00:1450:4001:824::2004, icmp_seq=3 hlim=117 time=37.102 ms +24 bytes from 2a00:1450:4001:824::2004, icmp_seq=4 hlim=117 time=35.727 ms + +--- www.google.com ping6 statistics --- +5 packets transmitted, 5 packets received, 0.0% packet loss +round-trip min/avg/max/std-dev = 35.727/53.211/93.870/22.000 ms +` + +// Linux ping output +var linuxPingOutput = ` +PING www.google.com (216.58.218.164) 56(84) bytes of data. +64 bytes from host.net (216.58.218.164): icmp_seq=1 ttl=63 time=35.2 ms +64 bytes from host.net (216.58.218.164): icmp_seq=2 ttl=63 time=42.3 ms +64 bytes from host.net (216.58.218.164): icmp_seq=3 ttl=63 time=45.1 ms +64 bytes from host.net (216.58.218.164): icmp_seq=4 ttl=63 time=43.5 ms +64 bytes from host.net (216.58.218.164): icmp_seq=5 ttl=63 time=51.8 ms + +--- www.google.com ping statistics --- +5 packets transmitted, 5 received, 0% packet loss, time 4010ms +rtt min/avg/max/mdev = 35.225/43.628/51.806/5.325 ms +` + +// BusyBox v1.24.1 (2017-02-28 03:28:13 CET) multi-call binary +var busyBoxPingOutput = ` +PING 8.8.8.8 (8.8.8.8): 56 data bytes +64 bytes from 8.8.8.8: seq=0 ttl=56 time=22.559 ms +64 bytes from 8.8.8.8: seq=1 ttl=56 time=15.810 ms +64 bytes from 8.8.8.8: seq=2 ttl=56 time=16.262 ms +64 bytes from 8.8.8.8: seq=3 ttl=56 time=15.815 ms + +--- 8.8.8.8 ping statistics --- +4 packets transmitted, 4 packets received, 0% packet loss +round-trip min/avg/max = 15.810/17.611/22.559 ms +` + +// Fatal ping output (invalid argument) +var fatalPingOutput = ` +ping: -i interval too short: Operation not permitted +` + +// Test that ping command output is processed properly +func TestProcessPingOutput(t *testing.T) { + trans, rec, ttl, min, avg, max, stddev, err := processPingOutput(bsdPingOutput) + assert.NoError(t, err) + assert.Equal(t, 55, ttl, "ttl value is 55") + assert.Equal(t, 5, trans, "5 packets were transmitted") + assert.Equal(t, 5, rec, "5 packets were received") + assert.InDelta(t, 15.087, min, 0.001) + assert.InDelta(t, 20.224, avg, 0.001) + assert.InDelta(t, 27.263, max, 0.001) + assert.InDelta(t, 4.076, stddev, 0.001) + + trans, rec, ttl, min, avg, max, stddev, err = processPingOutput(freebsdPing6Output) + assert.NoError(t, err) + assert.Equal(t, 117, ttl, "ttl value is 117") + assert.Equal(t, 5, trans, "5 packets were transmitted") + assert.Equal(t, 5, rec, "5 packets were received") + assert.InDelta(t, 35.727, min, 0.001) + assert.InDelta(t, 53.211, avg, 0.001) + assert.InDelta(t, 93.870, max, 0.001) + assert.InDelta(t, 22.000, stddev, 0.001) + + trans, rec, ttl, min, avg, max, stddev, err = processPingOutput(linuxPingOutput) + assert.NoError(t, err) + assert.Equal(t, 63, ttl, "ttl value is 63") + assert.Equal(t, 5, trans, "5 packets were transmitted") + assert.Equal(t, 5, rec, "5 packets were received") + assert.InDelta(t, 35.225, min, 0.001) + assert.InDelta(t, 43.628, avg, 0.001) + assert.InDelta(t, 51.806, max, 0.001) + assert.InDelta(t, 5.325, stddev, 0.001) + + trans, rec, ttl, min, avg, max, stddev, err = processPingOutput(busyBoxPingOutput) + assert.NoError(t, err) + assert.Equal(t, 56, ttl, "ttl value is 56") + assert.Equal(t, 4, trans, "4 packets were transmitted") + assert.Equal(t, 4, rec, "4 packets were received") + assert.InDelta(t, 15.810, min, 0.001) + assert.InDelta(t, 17.611, avg, 0.001) + assert.InDelta(t, 22.559, max, 0.001) + assert.InDelta(t, -1.0, stddev, 0.001) +} + +// Linux ping output with varying TTL +var linuxPingOutputWithVaryingTTL = ` +PING www.google.com (216.58.218.164) 56(84) bytes of data. +64 bytes from host.net (216.58.218.164): icmp_seq=1 ttl=63 time=35.2 ms +64 bytes from host.net (216.58.218.164): icmp_seq=2 ttl=255 time=42.3 ms +64 bytes from host.net (216.58.218.164): icmp_seq=3 ttl=64 time=45.1 ms +64 bytes from host.net (216.58.218.164): icmp_seq=4 ttl=64 time=43.5 ms +64 bytes from host.net (216.58.218.164): icmp_seq=5 ttl=255 time=51.8 ms + +--- www.google.com ping statistics --- +5 packets transmitted, 5 received, 0% packet loss, time 4010ms +rtt min/avg/max/mdev = 35.225/43.628/51.806/5.325 ms +` + +// Test that ping command output is processed properly +func TestProcessPingOutputWithVaryingTTL(t *testing.T) { + trans, rec, ttl, min, avg, max, stddev, err := processPingOutput(linuxPingOutputWithVaryingTTL) + assert.NoError(t, err) + assert.Equal(t, 63, ttl, "ttl value is 63") + assert.Equal(t, 5, trans, "5 packets were transmitted") + assert.Equal(t, 5, rec, "5 packets were transmitted") + assert.InDelta(t, 35.225, min, 0.001) + assert.InDelta(t, 43.628, avg, 0.001) + assert.InDelta(t, 51.806, max, 0.001) + assert.InDelta(t, 5.325, stddev, 0.001) +} + +// Test that processPingOutput returns an error when 'ping' fails to run, such +// as when an invalid argument is provided +func TestErrorProcessPingOutput(t *testing.T) { + _, _, _, _, _, _, _, err := processPingOutput(fatalPingOutput) + assert.Error(t, err, "Error was expected from processPingOutput") +} + +// Test that default arg lists are created correctly +func TestArgs(t *testing.T) { + p := Ping{ + Count: 2, + Interface: "eth0", + Timeout: 12.0, + Deadline: 24, + PingInterval: 1.2, + } + + var systemCases = []struct { + system string + output []string + }{ + {"darwin", []string{"-c", "2", "-n", "-s", "16", "-i", "1.2", "-W", "12000", "-t", "24", "-I", "eth0", "www.google.com"}}, + {"linux", []string{"-c", "2", "-n", "-s", "16", "-i", "1.2", "-W", "12", "-w", "24", "-I", "eth0", "www.google.com"}}, + {"anything else", []string{"-c", "2", "-n", "-s", "16", "-i", "1.2", "-W", "12", "-w", "24", "-i", "eth0", "www.google.com"}}, + } + for i := range systemCases { + actual := p.args("www.google.com", systemCases[i].system) + expected := systemCases[i].output + sort.Strings(actual) + sort.Strings(expected) + require.True(t, reflect.DeepEqual(expected, actual), + "Expected: %s Actual: %s", expected, actual) + } +} + +// Test that default arg lists for ping6 are created correctly +func TestArgs6(t *testing.T) { + p := Ping{ + Count: 2, + Interface: "eth0", + Timeout: 12.0, + Deadline: 24, + PingInterval: 1.2, + Binary: "ping6", + } + + var systemCases = []struct { + system string + output []string + }{ + {"freebsd", []string{"-c", "2", "-n", "-s", "16", "-i", "1.2", "-x", "12000", "-X", "24", "-S", "eth0", "www.google.com"}}, + {"linux", []string{"-c", "2", "-n", "-s", "16", "-i", "1.2", "-W", "12", "-w", "24", "-I", "eth0", "www.google.com"}}, + {"anything else", []string{"-c", "2", "-n", "-s", "16", "-i", "1.2", "-W", "12", "-w", "24", "-i", "eth0", "www.google.com"}}, + } + for i := range systemCases { + actual := p.args("www.google.com", systemCases[i].system) + expected := systemCases[i].output + sort.Strings(actual) + sort.Strings(expected) + require.True(t, reflect.DeepEqual(expected, actual), + "Expected: %s Actual: %s", expected, actual) + } +} + +func TestArguments(t *testing.T) { + arguments := []string{"-c", "3"} + expected := append(arguments, "www.google.com") + p := Ping{ + Count: 2, + Interface: "eth0", + Timeout: 12.0, + Deadline: 24, + PingInterval: 1.2, + Arguments: arguments, + } + + for _, system := range []string{"darwin", "linux", "anything else"} { + actual := p.args("www.google.com", system) + require.True(t, reflect.DeepEqual(actual, expected), "Expected: %s Actual: %s", expected, actual) + } +} + +func mockHostPinger(binary string, timeout float64, args ...string) (string, error) { + return linuxPingOutput, nil +} + +// Test that Gather function works on a normal ping +func TestPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"localhost", "influxdata.com"}, + PingHost: mockHostPinger, + } + + acc.GatherError(p.Gather) + tags := map[string]string{"url": "localhost"} + fields := map[string]interface{}{ + "packets_transmitted": 5, + "packets_received": 5, + "percent_packet_loss": 0.0, + "ttl": 63, + "minimum_response_ms": 35.225, + "average_response_ms": 43.628, + "maximum_response_ms": 51.806, + "standard_deviation_ms": 5.325, + "result_code": 0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) + + tags = map[string]string{"url": "influxdata.com"} + acc.AssertContainsTaggedFields(t, "ping", fields, tags) +} + +func TestPingGatherIntegration(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode, retrieves systems ping utility") + } + + var acc testutil.Accumulator + p, ok := inputs.Inputs["ping"]().(*Ping) + require.True(t, ok) + p.Urls = []string{"localhost", "influxdata.com"} + err := acc.GatherError(p.Gather) + require.NoError(t, err) + require.Equal(t, 0, acc.Metrics[0].Fields["result_code"]) + require.Equal(t, 0, acc.Metrics[1].Fields["result_code"]) +} + +var lossyPingOutput = ` +PING www.google.com (216.58.218.164) 56(84) bytes of data. +64 bytes from host.net (216.58.218.164): icmp_seq=1 ttl=63 time=35.2 ms +64 bytes from host.net (216.58.218.164): icmp_seq=3 ttl=63 time=45.1 ms +64 bytes from host.net (216.58.218.164): icmp_seq=5 ttl=63 time=51.8 ms + +--- www.google.com ping statistics --- +5 packets transmitted, 3 received, 40% packet loss, time 4010ms +rtt min/avg/max/mdev = 35.225/44.033/51.806/5.325 ms +` + +func mockLossyHostPinger(binary string, timeout float64, args ...string) (string, error) { + return lossyPingOutput, nil +} + +// Test that Gather works on a ping with lossy packets +func TestLossyPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com"}, + PingHost: mockLossyHostPinger, + } + + acc.GatherError(p.Gather) + tags := map[string]string{"url": "www.google.com"} + fields := map[string]interface{}{ + "packets_transmitted": 5, + "packets_received": 3, + "percent_packet_loss": 40.0, + "ttl": 63, + "minimum_response_ms": 35.225, + "average_response_ms": 44.033, + "maximum_response_ms": 51.806, + "standard_deviation_ms": 5.325, + "result_code": 0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) +} + +var errorPingOutput = ` +PING www.amazon.com (176.32.98.166): 56 data bytes +Request timeout for icmp_seq 0 + +--- www.amazon.com ping statistics --- +2 packets transmitted, 0 packets received, 100.0% packet loss +` + +func mockErrorHostPinger(binary string, timeout float64, args ...string) (string, error) { + // This error will not trigger correct error paths + return errorPingOutput, nil +} + +// Test that Gather works on a ping with no transmitted packets, even though the +// command returns an error +func TestBadPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.amazon.com"}, + PingHost: mockErrorHostPinger, + } + + acc.GatherError(p.Gather) + tags := map[string]string{"url": "www.amazon.com"} + fields := map[string]interface{}{ + "packets_transmitted": 2, + "packets_received": 0, + "percent_packet_loss": 100.0, + "result_code": 0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) +} + +func mockFatalHostPinger(binary string, timeout float64, args ...string) (string, error) { + return fatalPingOutput, errors.New("So very bad") +} + +// Test that a fatal ping command does not gather any statistics. +func TestFatalPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.amazon.com"}, + PingHost: mockFatalHostPinger, + } + + acc.GatherError(p.Gather) + assert.False(t, acc.HasMeasurement("packets_transmitted"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasMeasurement("packets_received"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasMeasurement("percent_packet_loss"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasMeasurement("ttl"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasMeasurement("minimum_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasMeasurement("average_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasMeasurement("maximum_response_ms"), + "Fatal ping should not have packet measurements") +} + +func TestErrorWithHostNamePingGather(t *testing.T) { + params := []struct { + out string + error error + }{ + {"", errors.New("host www.amazon.com: So very bad")}, + {"so bad", errors.New("host www.amazon.com: so bad, So very bad")}, + } + + for _, param := range params { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.amazon.com"}, + PingHost: func(binary string, timeout float64, args ...string) (string, error) { + return param.out, errors.New("So very bad") + }, + } + acc.GatherError(p.Gather) + assert.True(t, len(acc.Errors) > 0) + assert.Contains(t, acc.Errors, param.error) + } +} + +func TestPingBinary(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com"}, + Binary: "ping6", + PingHost: func(binary string, timeout float64, args ...string) (string, error) { + assert.True(t, binary == "ping6") + return "", nil + }, + } + acc.GatherError(p.Gather) +} + +func mockHostResolver(ctx context.Context, ipv6 bool, host string) (*net.IPAddr, error) { + ipaddr := net.IPAddr{} + ipaddr.IP = net.IPv4(127, 0, 0, 1) + return &ipaddr, nil +} + +// Test that Gather function works using native ping +func TestPingGatherNative(t *testing.T) { + type test struct { + P *Ping + } + + fakePingFunc := func(destination string) (*pingStats, error) { + s := &pingStats{ + Statistics: ping.Statistics{ + PacketsSent: 5, + PacketsRecv: 5, + Rtts: []time.Duration{ + 3 * time.Millisecond, + 4 * time.Millisecond, + 1 * time.Millisecond, + 5 * time.Millisecond, + 2 * time.Millisecond, + }, + }, + ttl: 1, + } + + return s, nil + } + p1 := &Ping{ + Urls: []string{"localhost", "127.0.0.2"}, + Method: "native", + Count: 5, + PingInterval: 1, + Percentiles: []int{50, 95, 99}, + //NativePingFunc: fakePingFunc, + } + p1.NativePingFunc = p1.NativePing + + tests := []test{ + { + p1, + }, + { + P: &Ping{ + Urls: []string{"localhost", "127.0.0.2"}, + Method: "native", + Count: 5, + PingInterval: 1, + Percentiles: []int{50, 95, 99}, + NativePingFunc: fakePingFunc, + }, + }, + } + + for _, tc := range tests { + var acc testutil.Accumulator + err := tc.P.Init() + require.NoError(t, err) + require.NoError(t, acc.GatherError(tc.P.Gather)) + fmt.Println(acc.Metrics) + assert.True(t, acc.HasPoint("ping", map[string]string{"url": "localhost"}, "packets_transmitted", 5)) + assert.True(t, acc.HasPoint("ping", map[string]string{"url": "localhost"}, "packets_received", 5)) + assert.True(t, acc.HasField("ping", "percentile50_ms")) + assert.Equal(t, float64(3), acc.Metrics[0].Fields["percentile50_ms"]) + assert.True(t, acc.HasField("ping", "percentile95_ms")) + assert.Equal(t, float64(4.799999), acc.Metrics[0].Fields["percentile95_ms"]) + assert.True(t, acc.HasField("ping", "percentile99_ms")) + assert.Equal(t, float64(4.96), acc.Metrics[0].Fields["percentile99_ms"]) + assert.True(t, acc.HasField("ping", "percent_packet_loss")) + assert.True(t, acc.HasField("ping", "minimum_response_ms")) + assert.True(t, acc.HasField("ping", "average_response_ms")) + assert.True(t, acc.HasField("ping", "maximum_response_ms")) + assert.True(t, acc.HasField("ping", "standard_deviation_ms")) + } + +} + +func TestNoPacketsSent(t *testing.T) { + p := &Ping{ + Urls: []string{"localhost", "127.0.0.2"}, + Method: "native", + Count: 5, + Percentiles: []int{50, 95, 99}, + NativePingFunc: func(destination string) (*pingStats, error) { + s := &pingStats{ + Statistics: ping.Statistics{ + PacketsSent: 0, + PacketsRecv: 0, + }, + } + + return s, nil + }, + } + + var testAcc testutil.Accumulator + err := p.Init() + require.NoError(t, err) + p.pingToURLNative("localhost", &testAcc) + require.Zero(t, testAcc.Errors) + require.True(t, testAcc.HasField("ping", "result_code")) + require.Equal(t, 2, testAcc.Metrics[0].Fields["result_code"]) +} + +// Test failed DNS resolutions +func TestDNSLookupError(t *testing.T) { + p := &Ping{ + Count: 1, + Log: testutil.Logger{}, + Urls: []string{"localhost"}, + Method: "native", + IPv6: false, + NativePingFunc: func(destination string) (*pingStats, error) { + return nil, fmt.Errorf("unknown") + }, + } + + var testAcc testutil.Accumulator + err := p.Init() + require.NoError(t, err) + p.pingToURLNative("localhost", &testAcc) + require.Zero(t, testAcc.Errors) + require.True(t, testAcc.HasField("ping", "result_code")) + require.Equal(t, 1, testAcc.Metrics[0].Fields["result_code"]) +} diff --git a/src/modules/monapi/plugins/ping/ping/ping_windows.go b/src/modules/monapi/plugins/ping/ping/ping_windows.go new file mode 100644 index 00000000..f53d6f09 --- /dev/null +++ b/src/modules/monapi/plugins/ping/ping/ping_windows.go @@ -0,0 +1,151 @@ +// +build windows + +package ping + +import ( + "errors" + "fmt" + "regexp" + "strconv" + "strings" + + "github.com/influxdata/telegraf" +) + +func (p *Ping) pingToURL(u string, acc telegraf.Accumulator) { + tags := map[string]string{"url": u} + fields := map[string]interface{}{"result_code": 0} + + args := p.args(u) + totalTimeout := 60.0 + if len(p.Arguments) == 0 { + totalTimeout = p.timeout() * float64(p.Count) + } + + out, err := p.pingHost(p.Binary, totalTimeout, args...) + // ping host return exitcode != 0 also when there was no response from host + // but command was execute successfully + var pendingError error + if err != nil { + // Combine go err + stderr output + pendingError = errors.New(strings.TrimSpace(out) + ", " + err.Error()) + } + trans, recReply, receivePacket, avg, min, max, err := processPingOutput(out) + if err != nil { + // fatal error + if pendingError != nil { + acc.AddError(fmt.Errorf("%s: %s", pendingError, u)) + } else { + acc.AddError(fmt.Errorf("%s: %s", err, u)) + } + + fields["result_code"] = 2 + fields["errors"] = 100.0 + acc.AddFields("ping", fields, tags) + return + } + // Calculate packet loss percentage + lossReply := float64(trans-recReply) / float64(trans) * 100.0 + lossPackets := float64(trans-receivePacket) / float64(trans) * 100.0 + + fields["packets_transmitted"] = trans + fields["reply_received"] = recReply + fields["packets_received"] = receivePacket + fields["percent_packet_loss"] = lossPackets + fields["percent_reply_loss"] = lossReply + if avg >= 0 { + fields["average_response_ms"] = float64(avg) + } + if min >= 0 { + fields["minimum_response_ms"] = float64(min) + } + if max >= 0 { + fields["maximum_response_ms"] = float64(max) + } + acc.AddFields("ping", fields, tags) +} + +// args returns the arguments for the 'ping' executable +func (p *Ping) args(url string) []string { + if len(p.Arguments) > 0 { + return p.Arguments + } + + args := []string{"-n", strconv.Itoa(p.Count)} + + if p.Timeout > 0 { + args = append(args, "-w", strconv.FormatFloat(p.Timeout*1000, 'f', 0, 64)) + } + + args = append(args, url) + + return args +} + +// processPingOutput takes in a string output from the ping command +// based on linux implementation but using regex ( multilanguage support ) +// It returns (, , , , , ) +func processPingOutput(out string) (int, int, int, int, int, int, error) { + // So find a line contain 3 numbers except reply lines + var stats, aproxs []string = nil, nil + err := errors.New("Fatal error processing ping output") + stat := regexp.MustCompile(`=\W*(\d+)\D*=\W*(\d+)\D*=\W*(\d+)`) + aprox := regexp.MustCompile(`=\W*(\d+)\D*ms\D*=\W*(\d+)\D*ms\D*=\W*(\d+)\D*ms`) + tttLine := regexp.MustCompile(`TTL=\d+`) + lines := strings.Split(out, "\n") + var receivedReply int = 0 + for _, line := range lines { + if tttLine.MatchString(line) { + receivedReply++ + } else { + if stats == nil { + stats = stat.FindStringSubmatch(line) + } + if stats != nil && aproxs == nil { + aproxs = aprox.FindStringSubmatch(line) + } + } + } + + // stats data should contain 4 members: entireExpression + ( Send, Receive, Lost ) + if len(stats) != 4 { + return 0, 0, 0, -1, -1, -1, err + } + trans, err := strconv.Atoi(stats[1]) + if err != nil { + return 0, 0, 0, -1, -1, -1, err + } + receivedPacket, err := strconv.Atoi(stats[2]) + if err != nil { + return 0, 0, 0, -1, -1, -1, err + } + + // aproxs data should contain 4 members: entireExpression + ( min, max, avg ) + if len(aproxs) != 4 { + return trans, receivedReply, receivedPacket, -1, -1, -1, err + } + min, err := strconv.Atoi(aproxs[1]) + if err != nil { + return trans, receivedReply, receivedPacket, -1, -1, -1, err + } + max, err := strconv.Atoi(aproxs[2]) + if err != nil { + return trans, receivedReply, receivedPacket, -1, -1, -1, err + } + avg, err := strconv.Atoi(aproxs[3]) + if err != nil { + return 0, 0, 0, -1, -1, -1, err + } + + return trans, receivedReply, receivedPacket, avg, min, max, err +} + +func (p *Ping) timeout() float64 { + // According to MSDN, default ping timeout for windows is 4 second + // Add also one second interval + + if p.Timeout > 0 { + return p.Timeout + 1 + } + return 4 + 1 +} diff --git a/src/modules/monapi/plugins/ping/ping/ping_windows_test.go b/src/modules/monapi/plugins/ping/ping/ping_windows_test.go new file mode 100644 index 00000000..4618ec4d --- /dev/null +++ b/src/modules/monapi/plugins/ping/ping/ping_windows_test.go @@ -0,0 +1,362 @@ +// +build windows + +package ping + +import ( + "errors" + "reflect" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Windows ping format ( should support multilanguage ?) +var winPLPingOutput = ` +Badanie 8.8.8.8 z 32 bajtami danych: +Odpowiedz z 8.8.8.8: bajtow=32 czas=49ms TTL=43 +Odpowiedz z 8.8.8.8: bajtow=32 czas=46ms TTL=43 +Odpowiedz z 8.8.8.8: bajtow=32 czas=48ms TTL=43 +Odpowiedz z 8.8.8.8: bajtow=32 czas=57ms TTL=43 + +Statystyka badania ping dla 8.8.8.8: + Pakiety: Wyslane = 4, Odebrane = 4, Utracone = 0 + (0% straty), +Szacunkowy czas bladzenia pakietww w millisekundach: + Minimum = 46 ms, Maksimum = 57 ms, Czas sredni = 50 ms +` + +// Windows ping format ( should support multilanguage ?) +var winENPingOutput = ` +Pinging 8.8.8.8 with 32 bytes of data: +Reply from 8.8.8.8: bytes=32 time=52ms TTL=43 +Reply from 8.8.8.8: bytes=32 time=50ms TTL=43 +Reply from 8.8.8.8: bytes=32 time=50ms TTL=43 +Reply from 8.8.8.8: bytes=32 time=51ms TTL=43 + +Ping statistics for 8.8.8.8: + Packets: Sent = 4, Received = 4, Lost = 0 (0% loss), +Approximate round trip times in milli-seconds: + Minimum = 50ms, Maximum = 52ms, Average = 50ms +` + +func TestHost(t *testing.T) { + trans, recReply, recPacket, avg, min, max, err := processPingOutput(winPLPingOutput) + assert.NoError(t, err) + assert.Equal(t, 4, trans, "4 packets were transmitted") + assert.Equal(t, 4, recReply, "4 packets were reply") + assert.Equal(t, 4, recPacket, "4 packets were received") + assert.Equal(t, 50, avg, "Average 50") + assert.Equal(t, 46, min, "Min 46") + assert.Equal(t, 57, max, "max 57") + + trans, recReply, recPacket, avg, min, max, err = processPingOutput(winENPingOutput) + assert.NoError(t, err) + assert.Equal(t, 4, trans, "4 packets were transmitted") + assert.Equal(t, 4, recReply, "4 packets were reply") + assert.Equal(t, 4, recPacket, "4 packets were received") + assert.Equal(t, 50, avg, "Average 50") + assert.Equal(t, 50, min, "Min 50") + assert.Equal(t, 52, max, "Max 52") +} + +func mockHostPinger(binary string, timeout float64, args ...string) (string, error) { + return winENPingOutput, nil +} + +// Test that Gather function works on a normal ping +func TestPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com", "www.reddit.com"}, + pingHost: mockHostPinger, + } + + acc.GatherError(p.Gather) + tags := map[string]string{"url": "www.google.com"} + fields := map[string]interface{}{ + "packets_transmitted": 4, + "packets_received": 4, + "reply_received": 4, + "percent_packet_loss": 0.0, + "percent_reply_loss": 0.0, + "average_response_ms": 50.0, + "minimum_response_ms": 50.0, + "maximum_response_ms": 52.0, + "result_code": 0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) + + tags = map[string]string{"url": "www.reddit.com"} + acc.AssertContainsTaggedFields(t, "ping", fields, tags) +} + +var errorPingOutput = ` +Badanie nask.pl [195.187.242.157] z 32 bajtami danych: +Upłynął limit czasu żądania. +Upłynął limit czasu żądania. +Upłynął limit czasu żądania. +Upłynął limit czasu żądania. + +Statystyka badania ping dla 195.187.242.157: + Pakiety: Wysłane = 4, Odebrane = 0, Utracone = 4 + (100% straty), +` + +func mockErrorHostPinger(binary string, timeout float64, args ...string) (string, error) { + return errorPingOutput, errors.New("No packets received") +} + +// Test that Gather works on a ping with no transmitted packets, even though the +// command returns an error +func TestBadPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.amazon.com"}, + pingHost: mockErrorHostPinger, + } + + acc.GatherError(p.Gather) + tags := map[string]string{"url": "www.amazon.com"} + fields := map[string]interface{}{ + "packets_transmitted": 4, + "packets_received": 0, + "reply_received": 0, + "percent_packet_loss": 100.0, + "percent_reply_loss": 100.0, + "result_code": 0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) +} + +func TestArguments(t *testing.T) { + arguments := []string{"-c", "3"} + p := Ping{ + Count: 2, + Timeout: 12.0, + Arguments: arguments, + } + + actual := p.args("www.google.com") + require.True(t, reflect.DeepEqual(actual, arguments), "Expected : %s Actual: %s", arguments, actual) +} + +var lossyPingOutput = ` +Badanie thecodinglove.com [66.6.44.4] z 9800 bajtami danych: +Upłynął limit czasu żądania. +Odpowiedź z 66.6.44.4: bajtów=9800 czas=114ms TTL=48 +Odpowiedź z 66.6.44.4: bajtów=9800 czas=114ms TTL=48 +Odpowiedź z 66.6.44.4: bajtów=9800 czas=118ms TTL=48 +Odpowiedź z 66.6.44.4: bajtów=9800 czas=114ms TTL=48 +Odpowiedź z 66.6.44.4: bajtów=9800 czas=114ms TTL=48 +Upłynął limit czasu żądania. +Odpowiedź z 66.6.44.4: bajtów=9800 czas=119ms TTL=48 +Odpowiedź z 66.6.44.4: bajtów=9800 czas=116ms TTL=48 + +Statystyka badania ping dla 66.6.44.4: + Pakiety: Wysłane = 9, Odebrane = 7, Utracone = 2 + (22% straty), +Szacunkowy czas błądzenia pakietów w millisekundach: + Minimum = 114 ms, Maksimum = 119 ms, Czas średni = 115 ms +` + +func mockLossyHostPinger(binary string, timeout float64, args ...string) (string, error) { + return lossyPingOutput, nil +} + +// Test that Gather works on a ping with lossy packets +func TestLossyPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com"}, + pingHost: mockLossyHostPinger, + } + + acc.GatherError(p.Gather) + tags := map[string]string{"url": "www.google.com"} + fields := map[string]interface{}{ + "packets_transmitted": 9, + "packets_received": 7, + "reply_received": 7, + "percent_packet_loss": 22.22222222222222, + "percent_reply_loss": 22.22222222222222, + "average_response_ms": 115.0, + "minimum_response_ms": 114.0, + "maximum_response_ms": 119.0, + "result_code": 0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) +} + +// Fatal ping output (invalid argument) +var fatalPingOutput = ` +Bad option -d. + + +Usage: ping [-t] [-a] [-n count] [-l size] [-f] [-i TTL] [-v TOS] + [-r count] [-s count] [[-j host-list] | [-k host-list]] + [-w timeout] [-R] [-S srcaddr] [-4] [-6] target_name + +Options: + -t Ping the specified host until stopped. + To see statistics and continue - type Control-Break; + To stop - type Control-C. + -a Resolve addresses to hostnames. + -n count Number of echo requests to send. + -l size Send buffer size. + -f Set Don't Fragment flag in packet (IPv4-only). + -i TTL Time To Live. + -v TOS Type Of Service (IPv4-only. This setting has been deprecated + and has no effect on the type of service field in the IP Header). + -r count Record route for count hops (IPv4-only). + -s count Timestamp for count hops (IPv4-only). + -j host-list Loose source route along host-list (IPv4-only). + -k host-list Strict source route along host-list (IPv4-only). + -w timeout Timeout in milliseconds to wait for each reply. + -R Use routing header to test reverse route also (IPv6-only). + -S srcaddr Source address to use. + -4 Force using IPv4. + -6 Force using IPv6. + +` + +func mockFatalHostPinger(binary string, timeout float64, args ...string) (string, error) { + return fatalPingOutput, errors.New("So very bad") +} + +// Test that a fatal ping command does not gather any statistics. +func TestFatalPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.amazon.com"}, + pingHost: mockFatalHostPinger, + } + + acc.GatherError(p.Gather) + assert.True(t, acc.HasFloatField("ping", "errors"), + "Fatal ping should have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "packets_transmitted"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "packets_received"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasFloatField("ping", "percent_packet_loss"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasFloatField("ping", "percent_reply_loss"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "average_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "maximum_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "minimum_response_ms"), + "Fatal ping should not have packet measurements") +} + +var UnreachablePingOutput = ` +Pinging www.google.pl [8.8.8.8] with 32 bytes of data: +Request timed out. +Request timed out. +Reply from 194.204.175.50: Destination net unreachable. +Request timed out. + +Ping statistics for 8.8.8.8: + Packets: Sent = 4, Received = 1, Lost = 3 (75% loss), +` + +func mockUnreachableHostPinger(binary string, timeout float64, args ...string) (string, error) { + return UnreachablePingOutput, errors.New("So very bad") +} + +//Reply from 185.28.251.217: TTL expired in transit. + +// in case 'Destination net unreachable' ping app return receive packet which is not what we need +// it's not contain valid metric so treat it as lost one +func TestUnreachablePingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com"}, + pingHost: mockUnreachableHostPinger, + } + + acc.GatherError(p.Gather) + + tags := map[string]string{"url": "www.google.com"} + fields := map[string]interface{}{ + "packets_transmitted": 4, + "packets_received": 1, + "reply_received": 0, + "percent_packet_loss": 75.0, + "percent_reply_loss": 100.0, + "result_code": 0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) + + assert.False(t, acc.HasFloatField("ping", "errors"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "average_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "maximum_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "minimum_response_ms"), + "Fatal ping should not have packet measurements") +} + +var TTLExpiredPingOutput = ` +Pinging www.google.pl [8.8.8.8] with 32 bytes of data: +Request timed out. +Request timed out. +Reply from 185.28.251.217: TTL expired in transit. +Request timed out. + +Ping statistics for 8.8.8.8: + Packets: Sent = 4, Received = 1, Lost = 3 (75% loss), +` + +func mockTTLExpiredPinger(binary string, timeout float64, args ...string) (string, error) { + return TTLExpiredPingOutput, errors.New("So very bad") +} + +// in case 'Destination net unreachable' ping app return receive packet which is not what we need +// it's not contain valid metric so treat it as lost one +func TestTTLExpiredPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com"}, + pingHost: mockTTLExpiredPinger, + } + + acc.GatherError(p.Gather) + + tags := map[string]string{"url": "www.google.com"} + fields := map[string]interface{}{ + "packets_transmitted": 4, + "packets_received": 1, + "reply_received": 0, + "percent_packet_loss": 75.0, + "percent_reply_loss": 100.0, + "result_code": 0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) + + assert.False(t, acc.HasFloatField("ping", "errors"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "average_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "maximum_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasInt64Field("ping", "minimum_response_ms"), + "Fatal ping should not have packet measurements") +} + +func TestPingBinary(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com"}, + Binary: "ping6", + pingHost: func(binary string, timeout float64, args ...string) (string, error) { + assert.True(t, binary == "ping6") + return "", nil + }, + } + acc.GatherError(p.Gather) +} diff --git a/src/modules/monapi/plugins/ping/ping_test.go b/src/modules/monapi/plugins/ping/ping_test.go new file mode 100644 index 00000000..4ce5e346 --- /dev/null +++ b/src/modules/monapi/plugins/ping/ping_test.go @@ -0,0 +1,12 @@ +package ping + +import ( + "github.com/didi/nightingale/src/modules/monapi/plugins" + "testing" +) + +func TestCollect(t *testing.T) { + plugins.PluginTest(t, &Rule{ + Urls: []string{"github.com", "n9e.didiyun.com"}, + }) +}