2022-06-13 23:58:57 +08:00
2022-06-15 11:37:08 +08:00
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
annotations: {}
app: n9e
component: categraf
release: nightingale
name: n9e-categraf
- apiGroups:
- ""
- nodes/metrics
- nodes/stats
- nodes/proxy
- get
apiVersion: v1
kind: ServiceAccount
annotations: {}
app: n9e
component: categraf
release: nightingale
name: n9e-categraf
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
annotations: {}
app: n9e
component: categraf
release: nightingale
name: n9e-categraf
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: n9e-categraf
- kind: ServiceAccount
name: n9e-categraf
2022-06-13 23:58:57 +08:00
kind: ConfigMap
name: categraf-config
apiVersion: v1
config.toml: |
# whether print configs
print_configs = false
# add label(agent_hostname) to series
# "" -> auto detect hostname
# "xx" -> use specified string xx
# "$hostname" -> auto detect hostname
# "$ip" -> auto detect ip
# "$hostname-$ip" -> auto detect hostname and ip to replace the vars
2022-06-14 07:34:38 +08:00
hostname = "$HOSTNAME"
2022-06-13 23:58:57 +08:00
# will not add label(agent_hostname) if true
omit_hostname = false
# s | ms
precision = "ms"
# global collect interval
interval = 15
# [global.labels]
# region = "shanghai"
# env = "localhost"
# default: 2000
batch = 2000
# channel(as queue) size
chan_size = 10000
2022-06-15 11:37:08 +08:00
url = "http://${NSERVER_SERVICE_WITH_PORT}/prometheus/v1/write"
2022-06-13 23:58:57 +08:00
# Basic auth username
basic_auth_user = ""
# Basic auth password
basic_auth_pass = ""
# timeout settings, unit: ms
timeout = 5000
dial_timeout = 2500
max_idle_conns_per_host = 100
logs.toml: |
## key 占位符
api_key = "ef4ahfbwzwwtlwfpbertgq1i6mq0ab1q"
## 是否开启日志采集
enable = false
## 接受日志的server地址
send_to = ""
## 发送日志的协议 http/tcp
send_type = "http"
## 是否压缩发送
use_compress = false
## 是否采用ssl
send_with_tls = false
batch_wait = 5
## 日志offset信息保存目录
run_path = "/opt/categraf/run"
## 最多同时采集多少个日志文件
open_files_limit = 100
## 定期扫描目录下是否有新增日志
scan_period = 10
## udp 读buffer的大小
frame_size = 9000
collect_container_all = true
## 全局的处理规则
## 单个日志采集配置
## file/journald/tcp/udp
type = "file"
## type=file时 path必填,type=journald/tcp/udp时 port必填
path = "/opt/tomcat/logs/*.txt"
source = "tomcat"
service = "my_service"
kind: ConfigMap
name: input-cpu
apiVersion: v1
cpu.toml: |
# # collect interval
# interval = 15
# # whether collect per cpu
# collect_per_cpu = false
kind: ConfigMap
name: input-disk
apiVersion: v1
disk.toml: |
# # collect interval
# interval = 15
# # By default stats will be gathered for all mount points.
# # Set mount_points will restrict the stats to only the specified mount points.
# mount_points = ["/"]
# Ignore mount points by filesystem type.
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
ignore_mount_points = ["/boot"]
kind: ConfigMap
name: input-diskio
apiVersion: v1
diskio.toml: |-
# # collect interval
# interval = 15
# # By default, categraf will gather stats for all devices including disk partitions.
# # Setting devices will restrict the stats to the specified devices.
# devices = ["sda", "sdb", "vd*"]
kind: ConfigMap
name: input-docker
apiVersion: v1
docker.toml: |
# # collect interval
# interval = 15
# # append some labels for series
# labels = { region="cloud", product="n9e" }
# # interval = global.interval * interval_times
# interval_times = 1
## Docker Endpoint
## To use TCP, set endpoint = "tcp://[ip]:[port]"
## To use environment variables (ie, docker-machine), set endpoint = "ENV"
endpoint = "unix:///var/run/docker.sock"
## Set to true to collect Swarm metrics(desired_replicas, running_replicas)
gather_services = false
gather_extend_memstats = false
container_id_label_enable = true
container_id_label_short_style = false
## Containers to include and exclude. Globs accepted.
## Note that an empty array for both will include all containers
container_name_include = []
container_name_exclude = []
## Container states to include and exclude. Globs accepted.
## When empty only containers in the "running" state will be captured.
## example: container_state_include = ["created", "restarting", "running", "removing", "paused", "exited", "dead"]
## example: container_state_exclude = ["created", "restarting", "running", "removing", "paused", "exited", "dead"]
# container_state_include = []
# container_state_exclude = []
## Timeout for docker list, info, and stats commands
timeout = "5s"
## Specifies for which classes a per-device metric should be issued
## Possible values are 'cpu' (cpu0, cpu1, ...), 'blkio' (8:0, 8:1, ...) and 'network' (eth0, eth1, ...)
## Please note that this setting has no effect if 'perdevice' is set to 'true'
perdevice_include = []
## Specifies for which classes a total metric should be issued. Total is an aggregated of the 'perdevice' values.
## Possible values are 'cpu', 'blkio' and 'network'
## Total 'cpu' is reported directly by Docker daemon, and 'network' and 'blkio' totals are aggregated by this plugin.
## Please note that this setting has no effect if 'total' is set to 'false'
total_include = ["cpu", "blkio", "network"]
## Which environment variables should we use as a tag
##tag_env = ["JAVA_HOME", "HEAP_SIZE"]
## docker labels to include and exclude as tags. Globs accepted.
## Note that an empty array for both will include all labels as tags
docker_label_include = []
docker_label_exclude = ["annotation*", "io.kubernetes*", "*description*", "*maintainer*", "*hash", "*author*"]
## Optional TLS Config
# use_tls = false
# tls_ca = "/etc/telegraf/ca.pem"
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
kind: ConfigMap
2022-06-15 11:37:08 +08:00
2022-06-15 11:38:59 +08:00
name: input-kubernetes
2022-06-15 11:37:08 +08:00
apiVersion: v1
kubernetes.toml: |-
# # collect interval
# interval = 15
# # append some labels for series
# labels = { region="cloud", product="n9e" }
# # interval = global.interval * interval_times
# interval_times = 1
# URL for the kubelet
url = ""
gather_system_container_metrics = true
gather_node_metrics = true
gather_pod_container_metrics = true
gather_pod_volume_metrics = true
gather_pod_network_metrics = true
## Use bearer token for authorization. ('bearer_token' takes priority)
## If both of these are empty, we'll use the default serviceaccount:
## at: /var/run/secrets/kubernetes.io/serviceaccount/token
# bearer_token = "/path/to/bearer/token"
## OR
# bearer_token_string = "abc_123"
## Pod labels to be added as tags. An empty array for both include and
## exclude will include all labels.
# label_include = []
# label_exclude = ["*"]
## Set response_timeout (default 5 seconds)
# response_timeout = "5s"
## Optional TLS Config
use_tls = true
# tls_ca = "/etc/categraf/ca.pem"
# tls_cert = "/etc/categraf/cert.pem"
# tls_key = "/etc/categraf/key.pem"
## Use TLS but skip chain & host verification
insecure_skip_verify = true
kind: ConfigMap
2022-07-01 17:34:37 +08:00
2022-07-04 18:20:48 +08:00
name: input-kubelet-metrics
2022-07-01 17:34:37 +08:00
apiVersion: v1
prometheus.toml: |
# # collect interval
# interval = 15
# kubelete metrics & cadvisor
urls = ["", ""]
bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
use_tls = true
insecure_skip_verify = true
url_label_key = "instance"
url_label_value = "{{.Host}}"
# if you use dashboards, do not delete this label
labels = {job="categraf"}
kind: ConfigMap
2022-06-13 23:58:57 +08:00
name: input-kernel
apiVersion: v1
kernel.toml: |
# # collect interval
# interval = 15
kind: ConfigMap
name: input-kernel-vmstat
apiVersion: v1
kernel_vmstat.toml: |
# # collect interval
# interval = 15
# file: /proc/vmstat
oom_kill = 1
nr_free_pages = 0
nr_alloc_batch = 0
nr_inactive_anon = 0
nr_active_anon = 0
nr_inactive_file = 0
nr_active_file = 0
nr_unevictable = 0
nr_mlock = 0
nr_anon_pages = 0
nr_mapped = 0
nr_file_pages = 0
nr_dirty = 0
nr_writeback = 0
nr_slab_reclaimable = 0
nr_slab_unreclaimable = 0
nr_page_table_pages = 0
nr_kernel_stack = 0
nr_unstable = 0
nr_bounce = 0
nr_vmscan_write = 0
nr_vmscan_immediate_reclaim = 0
nr_writeback_temp = 0
nr_isolated_anon = 0
nr_isolated_file = 0
nr_shmem = 0
nr_dirtied = 0
nr_written = 0
numa_hit = 0
numa_miss = 0
numa_foreign = 0
numa_interleave = 0
numa_local = 0
numa_other = 0
workingset_refault = 0
workingset_activate = 0
workingset_nodereclaim = 0
nr_anon_transparent_hugepages = 0
nr_free_cma = 0
nr_dirty_threshold = 0
nr_dirty_background_threshold = 0
pgpgin = 0
pgpgout = 0
pswpin = 0
pswpout = 0
pgalloc_dma = 0
pgalloc_dma32 = 0
pgalloc_normal = 0
pgalloc_movable = 0
pgfree = 0
pgactivate = 0
pgdeactivate = 0
pgfault = 0
pgmajfault = 0
pglazyfreed = 0
pgrefill_dma = 0
pgrefill_dma32 = 0
pgrefill_normal = 0
pgrefill_movable = 0
pgsteal_kswapd_dma = 0
pgsteal_kswapd_dma32 = 0
pgsteal_kswapd_normal = 0
pgsteal_kswapd_movable = 0
pgsteal_direct_dma = 0
pgsteal_direct_dma32 = 0
pgsteal_direct_normal = 0
pgsteal_direct_movable = 0
pgscan_kswapd_dma = 0
pgscan_kswapd_dma32 = 0
pgscan_kswapd_normal = 0
pgscan_kswapd_movable = 0
pgscan_direct_dma = 0
pgscan_direct_dma32 = 0
pgscan_direct_normal = 0
pgscan_direct_movable = 0
pgscan_direct_throttle = 0
zone_reclaim_failed = 0
pginodesteal = 0
slabs_scanned = 0
kswapd_inodesteal = 0
kswapd_low_wmark_hit_quickly = 0
kswapd_high_wmark_hit_quickly = 0
pageoutrun = 0
allocstall = 0
pgrotated = 0
drop_pagecache = 0
drop_slab = 0
numa_pte_updates = 0
numa_huge_pte_updates = 0
numa_hint_faults = 0
numa_hint_faults_local = 0
numa_pages_migrated = 0
pgmigrate_success = 0
pgmigrate_fail = 0
compact_migrate_scanned = 0
compact_free_scanned = 0
compact_isolated = 0
compact_stall = 0
compact_fail = 0
compact_success = 0
htlb_buddy_alloc_success = 0
htlb_buddy_alloc_fail = 0
unevictable_pgs_culled = 0
unevictable_pgs_scanned = 0
unevictable_pgs_rescued = 0
unevictable_pgs_mlocked = 0
unevictable_pgs_munlocked = 0
unevictable_pgs_cleared = 0
unevictable_pgs_stranded = 0
thp_fault_alloc = 0
thp_fault_fallback = 0
thp_collapse_alloc = 0
thp_collapse_alloc_failed = 0
thp_split = 0
thp_zero_page_alloc = 0
thp_zero_page_alloc_failed = 0
balloon_inflate = 0
balloon_deflate = 0
balloon_migrate = 0
kind: ConfigMap
name: input-linux-sysctl-fs
apiVersion: v1
linux_sysctl_fs.toml: |
# # collect interval
# interval = 15
kind: ConfigMap
name: input-mem
apiVersion: v1
mem.toml: |
# # collect interval
# interval = 15
# # whether collect platform specified metrics
collect_platform_fields = true
kind: ConfigMap
name: input-net
apiVersion: v1
net.toml: |-
# # collect interval
# interval = 15
# # whether collect protocol stats on Linux
# collect_protocol_stats = false
# # setting interfaces will tell categraf to gather these explicit interfaces
# interfaces = ["eth0"]
kind: ConfigMap
name: input-netstat
apiVersion: v1
netstat.toml: |
# # collect interval
# interval = 15
kind: ConfigMap
name: input-processes
apiVersion: v1
processes.toml: |-
# # collect interval
# interval = 15
# # force use ps command to gather
# force_ps = false
# # force use /proc to gather
# force_proc = false
kind: ConfigMap
name: input-system
apiVersion: v1
system.toml: |
# # collect interval
# interval = 15
# # whether collect metric: system_n_users
# collect_user_number = false
apiVersion: apps/v1
kind: DaemonSet
annotations: {}
app: n9e
component: categraf
release: nightingale
name: nightingale-categraf
app: n9e
component: categraf
release: nightingale
creationTimestamp: null
app: n9e
component: categraf
release: nightingale
- matchExpressions:
- key: kubernetes.io/os
operator: In
- linux
- env:
- name: TZ
value: Asia/Shanghai
- name: HOSTNAME
apiVersion: v1
fieldPath: spec.nodeName
- name: HOSTIP
apiVersion: v1
fieldPath: status.hostIP
- name: HOST_PROC
value: /hostfs/proc
- name: HOST_SYS
value: /hostfs/sys
value: /hostfs
2022-06-15 11:37:08 +08:00
image: flashcatcloud/categraf:v0.1.5
2022-06-13 23:58:57 +08:00
imagePullPolicy: IfNotPresent
name: categraf
resources: {}
- mountPath: /etc/categraf/conf/config.toml
name: categraf-config
subPath: config.toml
- mountPath: /etc/categraf/conf/logs.toml
name: categraf-config
subPath: logs.toml
- mountPath: /etc/categraf/conf/input.cpu
name: input-cpu
- mountPath: /etc/categraf/conf/input.disk
name: input-disk
- mountPath: /etc/categraf/conf/input.diskio
name: input-diskio
- mountPath: /etc/categraf/conf/input.docker
name: input-docker
2022-06-15 11:37:08 +08:00
- mountPath: /etc/categraf/conf/input.kubernetes
name: input-kubernetes
2022-07-01 17:34:37 +08:00
- mountPath: /etc/categraf/conf/input.prometheus
name: input-kubelet-metrics
2022-06-13 23:58:57 +08:00
- mountPath: /etc/categraf/conf/input.kernel
name: input-kernel
- mountPath: /etc/categraf/conf/input.kernel_vmstat
name: input-kernel-vmstat
- mountPath: /etc/categraf/conf/input.linux_sysctl_fs
name: input-linux-sysctl-fs
- mountPath: /etc/categraf/conf/input.mem
name: input-mem
- mountPath: /etc/categraf/conf/input.net
name: input-net
- mountPath: /etc/categraf/conf/input.netstat
name: input-netstat
- mountPath: /etc/categraf/conf/input.processes
name: input-processes
- mountPath: /etc/categraf/conf/input.system
name: input-system
- mountPath: /var/run/utmp
name: hostroutmp
readOnly: true
- mountPath: /hostfs
name: hostrofs
readOnly: true
- mountPath: /var/run/docker.sock
name: docker-socket
dnsPolicy: ClusterFirstWithHostNet
2022-06-17 10:42:14 +08:00
serviceAccountName: n9e-categraf
2022-06-13 23:58:57 +08:00
hostNetwork: true
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
- effect: NoSchedule
operator: Exists
- configMap:
defaultMode: 420
- key: config.toml
path: config.toml
- key: logs.toml
path: logs.toml
name: categraf-config
name: categraf-config
- configMap:
defaultMode: 420
name: input-cpu
name: input-cpu
- configMap:
defaultMode: 420
name: input-disk
name: input-disk
- configMap:
defaultMode: 420
name: input-diskio
name: input-diskio
- configMap:
defaultMode: 420
name: input-docker
name: input-docker
2022-06-15 11:37:08 +08:00
- configMap:
defaultMode: 420
name: input-kubernetes
name: input-kubernetes
2022-07-01 17:34:37 +08:00
- configMap:
defaultMode: 420
2022-07-04 18:20:48 +08:00
name: input-kubelet-metrics
2022-07-13 16:18:44 +08:00
name: input-kubelet-metrics
2022-06-13 23:58:57 +08:00
- configMap:
defaultMode: 420
name: input-kernel
name: input-kernel
- configMap:
defaultMode: 420
name: input-kernel-vmstat
name: input-kernel-vmstat
- configMap:
defaultMode: 420
name: input-linux-sysctl-fs
name: input-linux-sysctl-fs
- configMap:
defaultMode: 420
name: input-mem
name: input-mem
- configMap:
defaultMode: 420
name: input-net
name: input-net
- configMap:
defaultMode: 420
name: input-netstat
name: input-netstat
- configMap:
defaultMode: 420
name: input-processes
name: input-processes
- configMap:
defaultMode: 420
name: input-system
name: input-system
- hostPath:
path: /
type: ""
name: hostrofs
- hostPath:
path: /var/run/utmp
type: ""
name: hostroutmp
- hostPath:
path: /var/run/docker.sock
type: Socket
name: docker-socket