feat: support i18n metric desc (#1097)

* support i18n metric desc

* code refactor

* code refactor

Co-authored-by: ziv <xiaozheng@tuya.com>
This commit is contained in:
xiaoziv 2022-08-10 13:21:11 +08:00 committed by GitHub
parent 1a446f0749
commit fd93fd7182
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 282 additions and 164 deletions

View File

@ -1,3 +1,4 @@
zh:
cpu_usage_idle: CPU空闲率单位% cpu_usage_idle: CPU空闲率单位%
cpu_usage_active: CPU使用率单位% cpu_usage_active: CPU使用率单位%
cpu_usage_system: CPU内核态时间占比单位% cpu_usage_system: CPU内核态时间占比单位%
@ -127,6 +128,136 @@ http_response_http_response_code: http响应状态码
http_response_response_time: http响应用时 http_response_response_time: http响应用时
http_response_result_code: url探测结果0为正常否则url无法访问 http_response_result_code: url探测结果0为正常否则url无法访问
en:
cpu_usage_idle: "CPU idle rate(unit%)"
cpu_usage_active: "CPU usage rate(unit%)"
cpu_usage_system: "CPU kernel state time proportion(unit%)"
cpu_usage_user: "CPU user attitude time proportion(unit%)"
cpu_usage_nice: "The proportion of low priority CPU time, that is, the process NICE value is adjusted to the CPU time between 1-19. Note here that the value range of NICE is -20 to 19, the larger the value, the lower the priority, the lower the priority(unit%)"
cpu_usage_iowait: "CPU waiting for I/O time proportion(unit%)"
cpu_usage_irq: "CPU processing hard interrupt time proportion(unit%)"
cpu_usage_softirq: "CPU processing soft interrupt time proportion(unit%)"
cpu_usage_steal: "In the virtual machine environment, there is this indicator, which means that the CPU is used by other virtual machines for the proportion of time.(unit%)"
cpu_usage_guest: "The time to run other operating systems by virtualization, that is, the proportion of CPU time running the virtual machine(unit%)"
cpu_usage_guest_nice: "The proportion of time to run the virtual machine at low priority(unit%)"
disk_free: "The remaining amount of the hard disk partition (unit: byte)"
disk_used: "Hard disk partitional use (unit: byte)"
disk_used_percent: "Hard disk partitional use rate (unit:%)"
disk_total: "Total amount of hard disk partition (unit: byte)"
disk_inodes_free: "Hard disk partition INODE remaining amount"
disk_inodes_used: "Hard disk partition INODE usage amount"
disk_inodes_total: "The total amount of hard disk partition INODE"
diskio_io_time: "From the perspective of the device perspective, the total time of I/O request, the I/O request in the queue is count (unit: millisecond), the counter type, you need to use the function to find the value"
diskio_iops_in_progress: "IO requests that have been assigned to device -driven and have not yet been completed, not included in the queue but not yet assigned to the device -driven IO request, Gauge type"
diskio_merged_reads: "The number of times of adjacent reading request Merge, the counter type"
diskio_merged_writes: "The number of times the request Merge writes, the counter type"
diskio_read_bytes: "The number of byte reads, the counter type, you need to use the function to find the Rate to use the value"
diskio_read_time: "The total time of reading request (unit: millisecond), the counter type, you need to use the function to find the Rate to have the value of use"
diskio_reads: "Read the number of requests, the counter type, you need to use the function to find the Rate to use the value"
diskio_weighted_io_time: "From the perspective of the I/O request perspective, I/O wait for the total time. If there are multiple I/O requests at the same time, the time will be superimposed (unit: millisecond)"
diskio_write_bytes: "The number of bytes written, the counter type, you need to use the function to find the Rate to use the value"
diskio_write_time: "The total time of the request (unit: millisecond), the counter type, you need to use the function to find the rate to have the value of use"
diskio_writes: "Write the number of requests, the counter type, you need to use the function to find the rate to use value"
kernel_boot_time: "Kernel startup time"
kernel_context_switches: "Number of kernel context switching times"
kernel_entropy_avail: "Entropy pool inside the Linux system"
kernel_interrupts: "Number of kernel interruption"
kernel_processes_forked: "ForK's process number"
mem_active: "The total number of memory (including Cache and BUFFER memory)"
mem_available: "Application can use memory numbers"
mem_available_percent: "Memory remaining percentage (0 ~ 100)"
mem_buffered: "Used to make buffer size for the file"
mem_cached: "The size of the memory used by the cache memory (equal to diskcache minus Swap Cache )"
mem_commit_limit: "According to the over allocation ratio ('vm.overCommit _ Ratio'), this is the current total memory that can be allocated on the system."
mem_committed_as: "Currently allocated on the system. It is the sum of the memory of all process applications"
mem_dirty: "Waiting to be written back to the memory size of the disk"
mem_free: "Senior memory number"
mem_high_free: "Unused high memory size"
mem_high_total: "The total memory size of the high memory (Highmem refers to all the physical memory that is higher than 860 MB of memory, the HighMem area is used for user programs, or for page cache. This area is not directly mapped to the kernel space. The kernels must use different methods to use this section of memory. )"
mem_huge_page_size: "The size of each big page"
mem_huge_pages_free: "The number of Huge Pages in the pool that have not been allocated"
mem_huge_pages_total: "Reserve the total number of Huge Pages"
mem_inactive: "Free memory (including the memory of free and avalible)"
mem_low_free: "Unused low size"
mem_low_total: "The total size of the low memory memory can achieve the same role of high memory, and it can be used by the kernel to record some of its own data structure"
mem_mapped: "The size of the mapping of equipment and files"
mem_page_tables: "The size of the index table of the management of the memory paging page"
mem_shared: "The total memory shared by multiple processes"
mem_slab: "The size of the kernel data structure cache can reduce the consumption of application and release memory"
mem_sreclaimable: "The size of the SLAB can be recovered"
mem_sunreclaim: "The size of the SLAB cannot be recovered(SUnreclaim+SReclaimableSlab)"
mem_swap_cached: "The size of the swap space used by the cache memory (cache memory), the memory that has been swapped out, but is still stored in the swapfile. Used to be quickly replaced when needed without opening the I/O port again"
mem_swap_free: "The size of the switching space is not used"
mem_swap_total: "The total size of the exchange space"
mem_total: "Total memory"
mem_used: "Memory number"
mem_used_percent: "The memory has been used by several percentage (0 ~ 100)"
mem_vmalloc_chunk: "The largest continuous unused vmalloc area"
mem_vmalloc_totalL: "You can vmalloc virtual memory size"
mem_vmalloc_used: "Vmalloc's virtual memory size"
mem_write_back: "The memory size of the disk is being written back to the disk"
mem_write_back_tmp: "Fuse is used to temporarily write back the memory of the buffer area"
net_bytes_recv: "The total number of packaging of the network card (bytes)"
net_bytes_sent: "Total number of network cards (bytes)"
net_drop_in: "The number of packets for network cards"
net_drop_out: "The number of packets issued by the network card"
net_err_in: "The number of incorrect packets of the network card"
net_err_out: "Number of incorrect number of network cards"
net_packets_recv: "Net card collection quantity"
net_packets_sent: "Number of network card issuance"
netstat_tcp_established: "ESTABLISHED status network link number"
netstat_tcp_fin_wait1: "FIN _ WAIT1 status network link number"
netstat_tcp_fin_wait2: "FIN _ WAIT2 status number of network links"
netstat_tcp_last_ack: "LAST_ ACK status number of network links"
netstat_tcp_listen: "Number of network links in Listen status"
netstat_tcp_syn_recv: "SYN _ RECV status number of network links"
netstat_tcp_syn_sent: "SYN _ SENT status number of network links"
netstat_tcp_time_wait: "Time _ WAIT status network link number"
netstat_udp_socket: "Number of network links in UDP status"
processes_blocked: "The number of processes in the unreprudible sleep state('U','D','L')"
processes_dead: "Number of processes in recycling('X')"
processes_idle: "Number of idle processes hanging('I')"
processes_paging: "Number of paging processes('P')"
processes_running: "Number of processes during operation('R')"
processes_sleeping: "Can interrupt the number of processes('S')"
processes_stopped: "Pushing status process number('T')"
processes_total: "Total process number"
processes_total_threads: "Number of threads"
processes_unknown: "Unknown status process number"
processes_zombies: "Number of zombies('Z')"
swap_used_percent: "SWAP space replace the data volume"
system_load1: "1 minute average load value"
system_load5: "5 minutes average load value"
system_load15: "15 minutes average load value"
system_n_users: "User number"
system_n_cpus: "CPU nuclear number"
system_uptime: "System startup time"
nginx_accepts: "Since Nginx started, the total number of connections has been established with the client"
nginx_active: "The current number of activity connections that Nginx is being processed is equal to Reading/Writing/Waiting"
nginx_handled: "Starting from Nginx, the total number of client connections that have been processed"
nginx_reading: "Reading the total number of connections on the http request header"
nginx_requests: "Since nginx is started, the total number of client requests processed, due to the existence of HTTP Krrp - Alive requests, this value will be greater than the handled value"
nginx_upstream_check_fall: "UPStream_CHECK module detects the number of back -end failures"
nginx_upstream_check_rise: "UPSTREAM _ Check module to detect the number of back -end"
nginx_upstream_check_status_code: "The state of the backstream is 1, and the down is 0"
nginx_waiting: "When keep-alive is enabled, this value is equal to active (reading+writing), which means that Nginx has processed the resident connection that is waiting for the next request command"
nginx_writing: "The total number of connections to send a response to the client"
http_response_content_length: "HTTP message entity transmission length"
http_response_http_response_code: "http response status code"
http_response_response_time: "When http ring application"
http_response_result_code: "URL detection result 0 is normal, otherwise the URL cannot be accessed"
# [mysqld_exporter] # [mysqld_exporter]
mysql_global_status_uptime: The number of seconds that the server has been up.(Gauge) mysql_global_status_uptime: The number of seconds that the server has been up.(Gauge)
mysql_global_status_uptime_since_flush_status: The number of seconds since the most recent FLUSH STATUS statement.(Gauge) mysql_global_status_uptime_since_flush_status: The number of seconds since the most recent FLUSH STATUS statement.(Gauge)
@ -370,8 +501,6 @@ node_load15: cpu load 15m
# MEM # MEM
# 内核态 # 内核态
# 用户追踪已从交换区获取但尚未修改的页面的内存
node_memory_SwapCached_bytes: Memory that keeps track of pages that have been fetched from swap but not yet been modified
# 内核用于缓存数据结构供自己使用的内存 # 内核用于缓存数据结构供自己使用的内存
node_memory_Slab_bytes: Memory used by the kernel to cache data structures for its own use node_memory_Slab_bytes: Memory used by the kernel to cache data structures for its own use
# slab中可回收的部分 # slab中可回收的部分
@ -433,7 +562,7 @@ node_memory_SwapTotal_bytes: Memory information field SwapTotal_bytes
node_memory_SwapFree_bytes: Memory information field SwapFree_bytes node_memory_SwapFree_bytes: Memory information field SwapFree_bytes
# DISK # DISK
node_filesystem_files_free: Filesystem space available to non-root users in byte node_filesystem_avail_bytes: Filesystem space available to non-root users in byte
node_filesystem_free_bytes: Filesystem free space in bytes node_filesystem_free_bytes: Filesystem free space in bytes
node_filesystem_size_bytes: Filesystem size in bytes node_filesystem_size_bytes: Filesystem size in bytes
node_filesystem_files_free: Filesystem total free file nodes node_filesystem_files_free: Filesystem total free file nodes

View File

@ -3,28 +3,43 @@ package config
import ( import (
"path" "path"
cmap "github.com/orcaman/concurrent-map"
"github.com/toolkits/pkg/file" "github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/runner" "github.com/toolkits/pkg/runner"
) )
var Metrics = cmap.New() // metricDesc , As load map happens before read map, there is no necessary to use concurrent map for metric desc store
type metricDesc struct {
CommonDesc map[string]string `yaml:",inline" json:"common"`
Zh map[string]string `yaml:"zh" json:"zh"`
En map[string]string `yaml:"en" json:"en"`
}
var MetricDesc metricDesc
// GetMetricDesc , if metric is not registered, empty string will be returned
func GetMetricDesc(lang, metric string) string {
var m map[string]string
if lang == "zh" {
m = MetricDesc.Zh
} else {
m = MetricDesc.En
}
if m != nil {
if desc, has := m[metric]; has {
return desc
}
}
return MetricDesc.CommonDesc[metric]
}
func loadMetricsYaml() error { func loadMetricsYaml() error {
fp := path.Join(runner.Cwd, "etc", "metrics.yaml") fp := C.MetricsYamlFile
if fp == "" {
fp = path.Join(runner.Cwd, "etc", "metrics.yaml")
}
if !file.IsExist(fp) { if !file.IsExist(fp) {
return nil return nil
} }
return file.ReadYaml(fp, &MetricDesc)
nmap := make(map[string]string)
err := file.ReadYaml(fp, &nmap)
if err != nil {
return err
}
for key, val := range nmap {
Metrics.Set(key, val)
}
return nil
} }

View File

@ -1,35 +1,14 @@
package router package router
import ( import (
"path"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/ginx" "github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/runner"
"github.com/didi/nightingale/v5/src/webapi/config" "github.com/didi/nightingale/v5/src/webapi/config"
) )
func metricsDescGetFile(c *gin.Context) { func metricsDescGetFile(c *gin.Context) {
fp := config.C.MetricsYamlFile c.JSON(200, config.MetricDesc)
if fp == "" {
fp = path.Join(runner.Cwd, "etc", "metrics.yaml")
}
if !file.IsExist(fp) {
c.String(404, "%s not found", fp)
return
}
ret := make(map[string]string)
err := file.ReadYaml(fp, &ret)
if err != nil {
c.String(500, err.Error())
return
}
c.JSON(200, ret)
} }
// 前端传过来一个metric数组后端去查询有没有对应的释义返回map // 前端传过来一个metric数组后端去查询有没有对应的释义返回map
@ -38,13 +17,8 @@ func metricsDescGetMap(c *gin.Context) {
ginx.BindJSON(c, &arr) ginx.BindJSON(c, &arr)
ret := make(map[string]string) ret := make(map[string]string)
for i := 0; i < len(arr); i++ { for _, key := range arr {
desc, has := config.Metrics.Get(arr[i]) ret[key] = config.GetMetricDesc(c.GetHeader("X-Language"), key)
if !has {
ret[arr[i]] = ""
} else {
ret[arr[i]] = desc.(string)
}
} }
ginx.NewRender(c).Data(ret, nil) ginx.NewRender(c).Data(ret, nil)