Merge pull request #97 from kongfei605/scrape_k8s

add deployment.yaml for scrape k8s apiserver & coredns
This commit is contained in:
kongfei605 2022-07-15 18:33:11 +08:00 committed by GitHub
commit 571b68425b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 421 additions and 8 deletions

View File

@ -54,16 +54,19 @@ nohup ./categraf &> stdout.log &
```
## Deploy categraf as daemonset
## Deploy categraf as daemonset, deployment or sidecar
edit k8s/daemonset.yaml, replace NSERVER_SERVICE_WITH_PORT with service ip:port of nserver in your cluster, replace CATEGRAF_NAMESPACE with namespace value, then run:
```shell
kubectl apply -n monitoring -f k8s/daemonset.yaml
kubectl apply -n monitoring -f k8s/sidecar.yaml
kubectl apply -n monitoring -f k8s/daemonset.yaml # collect metrics, metrics/cadvisor of node
kubectl apply -n monitoring -f k8s/sidecar.yaml # collect service metrics
kubectl apply -n monitoring -f k8s/deployment.yaml #collect apiserver coredns etc
```
Notice: k8s/sidecar.yaml is a demo, replace mock with your own image.
Notice: k8s/sidecar.yaml is a demo, replace mock with your own image of service.
## Scrape like prometheus
see detail [here](https://github.com/flashcatcloud/categraf/blob/main/prometheus/README.md)
## Plugin

View File

@ -11,10 +11,11 @@ func (a *Agent) startPrometheusScrape() {
if coreconfig.Config == nil ||
coreconfig.Config.Prometheus == nil ||
!coreconfig.Config.Prometheus.Enable {
log.Println("I! prometheus scraping disabled!")
return
}
log.Println("I! prometheus scraping started!")
go prometheus.Start()
log.Println("I! prometheus scraping started!")
}
func (a *Agent) stopPrometheusScrape() {

215
k8s/deployment.yaml Normal file
View File

@ -0,0 +1,215 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations: {}
labels:
app: n9e
component: categraf
name: categraf-role
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/metrics
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups:
- extensions
- networking.k8s.io
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics", "/metrics/cadvisor"]
verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
annotations: {}
labels:
app: n9e
component: categraf
name: categraf-serviceaccount
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations: {}
labels:
app: n9e
component: categraf
name: categraf-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: categraf-role
subjects:
- kind: ServiceAccount
name: categraf-serviceaccount
namespace: ${CATEGRAF_NAMESPACE}
---
kind: ConfigMap
metadata:
name: categraf-config
apiVersion: v1
data:
config.toml: |
[global]
# whether print configs
print_configs = true
# add label(agent_hostname) to series
# "" -> auto detect hostname
# "xx" -> use specified string xx
# "$hostname" -> auto detect hostname
# "$ip" -> auto detect ip
# "$hostname-$ip" -> auto detect hostname and ip to replace the vars
hostname = "$HOSTNAME"
# will not add label(agent_hostname) if true
omit_hostname = false
# s | ms
precision = "ms"
# global collect interval
interval = 15
# [global.labels]
# region = "shanghai"
# env = "localhost"
[writer_opt]
# default: 2000
batch = 2000
# channel(as queue) size
chan_size = 10000
[[writers]]
url = "http://${NSERVER_SERVICE_WITH_PORT}/prometheus/v1/write"
# Basic auth username
basic_auth_user = ""
# Basic auth password
basic_auth_pass = ""
# timeout settings, unit: ms
timeout = 5000
dial_timeout = 2500
max_idle_conns_per_host = 100
prometheus.toml: |
[prometheus]
enable = true
scrape_config_file="/opt/categraf/conf/in_cluster_scrape.yaml"
## log level, debug warn info error
log_level="info"
## wal file storage path ,default ./data-agent
wal_storage_path="/"
## wal reserve time duration, default value is 2 hour
# wal_min_duration=2
in_cluster_scrape.yaml: |
global:
scrape_interval: 15s
#external_labels:
# cluster: test
# replica: 0
scrape_configs:
- job_name: "categraf-apiserver"
metrics_path: "/metrics"
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
insecure_skip_verify: true
authorization:
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels:
[
__meta_kubernetes_namespace,
__meta_kubernetes_service_name,
__meta_kubernetes_endpoint_port_name,
]
action: keep
regex: default;kubernetes;https
- job_name: "coredns"
metrics_path: "/metrics"
kubernetes_sd_configs:
- role: endpoints
scheme: http
relabel_configs:
- source_labels:
[
__meta_kubernetes_namespace,
__meta_kubernetes_service_name,
__meta_kubernetes_endpoint_port_name,
]
action: keep
regex: kube-system;kube-dns;metrics
remote_write:
- url: 'http://${NSERVER_SERVICE_WITH_PORT}/prometheus/v1/write'
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: n9e
component: categraf
name: nightingale-categraf
spec:
replicas: 1
selector:
matchLabels:
app: n9e
component: categraf
template:
metadata:
labels:
app: n9e
component: categraf
spec:
containers:
- env:
- name: TZ
value: Asia/Shanghai
- name: HOSTNAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: HOSTIP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.hostIP
image: flashcatcloud/categraf:latest
imagePullPolicy: IfNotPresent
name: categraf
command: ["/usr/bin/categraf"]
args: ["-configs", "/opt/categraf/conf"]
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /opt/categraf/conf
name: categraf-config
dnsPolicy: ClusterFirst
hostNetwork: false
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccountName: categraf-serviceaccount
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoSchedule
operator: Exists
volumes:
- configMap:
defaultMode: 420
name: categraf-config
name: categraf-config

View File

@ -0,0 +1,69 @@
global:
scrape_interval: 15s
external_labels:
cluster: test
replica: 0
scrape_configs:
- job_name: "categraf-out"
static_configs:
- targets: ["172.16.6.171:8080"]
- job_name: "apiserver"
metrics_path: "/metrics"
kubernetes_sd_configs:
- role: endpoints
# apiserver lvs address or service ip:port eg, https://172.31.0.1:443
api_server: "https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}"
tls_config:
#eg. /etc/kubernetes/pki/ca.crt
ca_file: /path/to/apiserver/ca.cert
#eg /etc/kubernetes/pki/apiserver-kubelet-client.crt
cert_file: /path/to/apiserver/client.crt
# /etc/kubernetes/pki/apiserver-kubelet-client.key
key_file: /path/to/apiserver/client.key
insecure_skip_verify: true
scheme: https
tls_config:
#eg. /etc/kubernetes/pki/ca.crt
ca_file: /path/to/apiserver/ca.cert
#eg /etc/kubernetes/pki/apiserver-kubelet-client.crt
cert_file: /path/to/apiserver/client.crt
# /etc/kubernetes/pki/apiserver-kubelet-client.key
key_file: /path/to/apiserver/client.key
insecure_skip_verify: true
relabel_configs:
- source_labels:
[
__meta_kubernetes_namespace,
__meta_kubernetes_service_name,
__meta_kubernetes_endpoint_port_name,
]
action: keep
regex: default;kubernetes;https
- job_name: "coredns"
metrics_path: "/metrics"
kubernetes_sd_configs:
- role: endpoints
# apiserver lvs address or service ip:port eg, https://172.31.0.1:443
api_server: "https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}"
tls_config:
#eg. /etc/kubernetes/pki/ca.crt
ca_file: /path/to/apiserver/ca.cert
#eg /etc/kubernetes/pki/apiserver-kubelet-client.crt
cert_file: /path/to/apiserver/client.crt
# /etc/kubernetes/pki/apiserver-kubelet-client.key
key_file: /path/to/apiserver/client.key
insecure_skip_verify: true
scheme: http
relabel_configs:
- source_labels:
[
__meta_kubernetes_namespace,
__meta_kubernetes_service_name,
__meta_kubernetes_endpoint_port_name,
]
action: keep
regex: kube-system;kube-dns;metrics
remote_write:
- url: 'http://172.31.62.213/prometheus/v1/write'

View File

@ -8,7 +8,7 @@ scrape_configs:
- job_name: "categraf-out"
static_configs:
- targets: ["172.16.6.171:8080"]
- job_name: "categraf-out-apiserver"
- job_name: "apiserver"
metrics_path: "/metrics"
kubernetes_sd_configs:
- role: endpoints
@ -27,7 +27,7 @@ scrape_configs:
]
action: keep
regex: default;kubernetes;https
- job_name: "categraf-out-coredns"
- job_name: "coredns"
metrics_path: "/metrics"
kubernetes_sd_configs:
- role: endpoints

View File

@ -0,0 +1,53 @@
global:
scrape_interval: 15s
external_labels:
cluster: test
replica: 0
scrape_configs:
- job_name: "categraf-out"
static_configs:
- targets: ["172.16.6.171:8080"]
- job_name: "apiserver"
metrics_path: "/metrics"
kubernetes_sd_configs:
- role: endpoints
# apiserver lvs address or service ip:port eg, https://172.31.0.1:443
api_server: "https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}"
bearer_token_file: /path/to/token
tls_config:
insecure_skip_verify: true
scheme: https
tls_config:
insecure_skip_verify: true
bearer_token_file: /path/to/token
relabel_configs:
- source_labels:
[
__meta_kubernetes_namespace,
__meta_kubernetes_service_name,
__meta_kubernetes_endpoint_port_name,
]
action: keep
regex: default;kubernetes;https
- job_name: "coredns"
metrics_path: "/metrics"
kubernetes_sd_configs:
- role: endpoints
# apiserver lvs address or service ip:port eg, https://172.31.0.1:443
api_server: "https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}"
tls_config:
bearer_token_file: /path/to/token
insecure_skip_verify: true
scheme: http
relabel_configs:
- source_labels:
[
__meta_kubernetes_namespace,
__meta_kubernetes_service_name,
__meta_kubernetes_endpoint_port_name,
]
action: keep
regex: kube-system;kube-dns;metrics
remote_write:
- url: 'http://172.31.62.213/prometheus/v1/write'

View File

@ -8,3 +8,75 @@ For more details, see the official docs:
An [example](../conf/in_cluster_scrape.yaml) to scrape kube-apiserver and core-dns metrics .
more examples click [here](https://github.com/prometheus/prometheus/tree/main/documentation/examples)
## How to create token
1. crate token ```kubectl apply -f auth.yaml```, replace CATEGRAF_NAMESPACE with your own in auth.yaml
```
### auth.yaml
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations: {}
labels:
app: n9e
component: categraf
name: categraf-role
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/metrics
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups:
- extensions
- networking.k8s.io
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics", "/metrics/cadvisor"]
verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
annotations: {}
labels:
app: n9e
component: categraf
name: categraf-serviceaccount
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations: {}
labels:
app: n9e
component: categraf
name: categraf-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: categraf-role
subjects:
- kind: ServiceAccount
name: categraf-serviceaccount
namespace: ${CATEGRAF_NAMESPACE}
---
```
2. get token
Recommended Strongly: Scraping in cluster, token will be auto mount into pod with path ```/var/run/secrets/kubernetes.io/serviceaccount/token```, you do not need to care about it. Replace all Vars with your own in file `k8s/in_cluster_scrape.yaml`.
Scraping out of cluster, you can get token with this way and save it to file, then fill `bearer_token_file` in file `k8s/scrape_with_token.yaml`
```
secrets=$(kubectl get serviceaccount categraf-serviceaccount -o jsonpath={.secrets[].name})
kubectl get secrets ${secrets} -o jsonpath={.data.token} | base64 -d
```
`k8s/scrape_with_cafile.yaml` and `k8s/scrape_with_kubecofnig.yaml` is recommended only if you are proficient in
X509 client certs.