安装node-exporter
所有节点导入镜像
[root@k8sm1 ~]# ctr -n k8s.io image import node-exporter.tar.gz
创建node采集Pod
apiVersion: v1
kind: Namespace
metadata:
name: monitor-sa
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitor-sa
labels:
name: node-exporter
spec:
selector:
matchLabels:
name: node-exporter
template:
metadata:
labels:
name: node-exporter
spec:
hostIPC: true
hostNetwork: true
hostPID: true
containers:
- name: node-exporter
image: docker.io/prom/node-exporter:v0.16.0
imagePullPolicy: IfNotPresent
ports:
- containerPort: 9100
resources:
requests:
cpu: 0.15
securityContext:
privileged: true
volumeMounts:
- name: dev
mountPath: /host/dev
- name: proc
mountPath: /host/proc
- name: sys
mountPath: /host/sys
args:
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --collector.filesystem.ignored-mount-points
- '""^/(sys|proc|dev|host|etc)($|/)"'
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: "Exists"
effect: "NoSchedule"
volumes:
- name: proc
hostPath:
path: /proc
- name: dev
hostPath:
path: /dev
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
部署完成,本机测试
[root@k8sm1 prometheus]# curl http://192.168.1.181:9100/metrics | grep node_cpu_seconds
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100# HELP node_cpu_seconds_total Seconds the cpus spent in each mode.
62# TYPE node_cpu_seconds_total counter
259node_cpu_seconds_total{cpu="0",mode="idle"} 429035.72
node_cpu_seconds_total{cpu="0",mode="iowait"} 28.4
10node_cpu_seconds_total{cpu="0",mode="irq"} 0
0 node_cpu_seconds_total{cpu="0",mode="nice"} 0.92
622node_cpu_seconds_total{cpu="0",mode="softirq"} 36.76
安装Prometheus
创建SA并授权
[root@k8sm1 prometheus]# cat sa.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
namespace: monitor-sa
name: monitor
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
namespace: monitor-sa
name: monitor-clusterrolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
namespace: monitor-sa
name: monitor
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: monitor-clusterrolebinding1
namespace: monitor-sa
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: User
namespace: monitor-sa
name: system:serviceaccount:monitor
宿主机创建数据目录
[root@k8sm1 prometheus]# mkdir -p /prometheus/data
[root@k8sm1 prometheus]# chmod 777 /prometheus/data/
解压镜像
[root@k8sm1 prometheus]# crictl images list TAG IMAGE ID SIZE
docker.io/prom/prometheus v2.33.5 7ae78dc7a8114 77.6MB
创建prometheus配置文件
[root@k8sm1 prometheus]# cat prometheus-cfg.yaml
kind: ConfigMap
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus-config
namespace: monitor-sa
data:
prometheus.yml: |
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
- job_name: 'kubernetes-node'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-node-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
部署Prometheus 的 Pod节点
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-server
namespace: monitor-sa
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
component: server
#matchExpressions:
#- {key: app, operator: In, values: [prometheus]}
#- {key: component, operator: In, values: [server]}
template:
metadata:
labels:
app: prometheus
component: server
annotations:
prometheus.io/scrape: 'false'
spec:
nodeName: k8sm1
serviceAccountName: monitor
containers:
- name: prometheus
image: prom/prometheus:v2.33.5
imagePullPolicy: IfNotPresent
command:
- prometheus
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention=720h
- --web.enable-lifecycle
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: /etc/prometheus
name: prometheus-config
- mountPath: /prometheus/
name: prometheus-storage-volume
volumes:
- name: prometheus-config
configMap:
name: prometheus-config
- name: prometheus-storage-volume
hostPath:
path: /prometheus/data
type: Directory
创建prometheus的Service服务
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitor-sa
labels:
app: prometheus
spec:
selector:
app: prometheus
component: server
type: NodePort
ports:
- port: 9090
targetPort: 9090
nodePort: 30268
安装Grafana
导入新版镜像
[root@k8sm1 ~]# ctr -n k8s.io image import grafana_8.4.5.tar.gz
宿主机创建grafana持久化存储目录
[root@k8sm1 data]# mkdir -p /var/lib/grafana/
[root@k8sm1 data]# chmod 777 /var/lib/grafana/
创建grafana 的Pod 文件
[root@k8sm1 prometheus]# vim grafana.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: monitoring-grafana
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
task: monitoring
k8s-app: grafana
template:
metadata:
labels:
task: monitoring
k8s-app: grafana
spec:
nodeName: k8sm1
containers:
- name: grafana
image: grafana/grafana:8.4.5
imagePullPolicy: IfNotPresent
ports:
- containerPort: 3000
protocol: TCP
volumeMounts:
- mountPath: /etc/ssl/certs
name: ca-certificates
readOnly: true
- mountPath: /var
name: grafana-storage
- mountPath: /var/lib/grafana/
name: lib
env:
- name: INFLUXDB_HOST
value: monitoring-influxdb
- name: GF_SERVER_HTTP_PORT
value: "3000"
# The following env variables are required to make Grafana accessible via
# the kubernetes api-server proxy. On production clusters, we recommend
# removing these env variables, setup auth for grafana, and expose the grafana
# service using a LoadBalancer or a public IP.
- name: GF_AUTH_BASIC_ENABLED
value: "false"
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "true"
- name: GF_AUTH_ANONYMOUS_ORG_ROLE
value: Admin
- name: GF_SERVER_ROOT_URL
# If you're only using the API Server proxy, set this value instead:
# value: /api/v1/namespaces/kube-system/services/monitoring-grafana/proxy
value: /
volumes:
- name: ca-certificates
hostPath:
path: /etc/ssl/certs
- name: grafana-storage
emptyDir: {}
- name: lib
hostPath:
path: /var/lib/grafana/
type: DirectoryOrCreate
创建grafana 的Service服务
apiVersion: v1
kind: Service
metadata:
labels:
kubernetes.io/name: monitoring-grafana
name: monitoring-grafana
namespace: kube-system
spec:
# In a production setup, we recommend accessing Grafana through an external Loadbalancer
# or through a public IP.
# type: LoadBalancer
# You could also use NodePort to expose the service at a randomly-generated port
# type: NodePort
ports:
- port: 80
targetPort: 3000
selector:
k8s-app: grafana
type: NodePort