prometheus 配置
  TEZNKK3IfmPf 2023年11月15日 19 0


apiVersion: v1
data:
  alerts: |
    {}
  prometheus.yml: |
    global:
      scrape_interval: 30s
      scrape_timeout: 30s
    rule_files:
    - /etc/config/rules
    - /etc/config/alerts
    scrape_configs:
    - job_name: prometheus
      static_configs:
      - targets:
        - localhost:9090
    - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      job_name: kubernetes-apiservers
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - action: keep
        regex: default;kubernetes;https
        source_labels:
        - __meta_kubernetes_namespace
        - __meta_kubernetes_service_name
        - __meta_kubernetes_endpoint_port_name
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        insecure_skip_verify: true
    - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      job_name: kubernetes-nodes
      kubernetes_sd_configs:
      - role: node
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - replacement: kubernetes.default.svc:443
        target_label: __address__
      - regex: (.+)
        replacement: /api/v1/nodes//proxy/metrics
        source_labels:
        - __meta_kubernetes_node_name
        target_label: __metrics_path__
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        insecure_skip_verify: true
    - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      job_name: kubernetes-nodes-cadvisor
      kubernetes_sd_configs:
      - role: node
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - replacement: kubernetes.default.svc:443
        target_label: __address__
      - regex: (.+)
        replacement: /api/v1/nodes//proxy/metrics/cadvisor
        source_labels:
        - __meta_kubernetes_node_name
        target_label: __metrics_path__
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        insecure_skip_verify: true
    - job_name: kubernetes-service-endpoints
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - action: keep
        regex: true
        source_labels:
        - __meta_kubernetes_service_annotation_prometheus_io_scrape
      - action: replace
        regex: (https?)
        source_labels:
        - __meta_kubernetes_service_annotation_prometheus_io_scheme
        target_label: __scheme__
      - action: replace
        regex: (.+)
        source_labels:
        - __meta_kubernetes_service_annotation_prometheus_io_path
        target_label: __metrics_path__
      - action: replace
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
        source_labels:
        - __address__
        - __meta_kubernetes_service_annotation_prometheus_io_port
        target_label: __address__
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - action: replace
        source_labels:
        - __meta_kubernetes_namespace
        target_label: kubernetes_namespace
      - action: replace
        source_labels:
        - __meta_kubernetes_service_name
        target_label: kubernetes_name
    - honor_labels: true
      job_name: prometheus-pushgateway
      kubernetes_sd_configs:
      - role: service
      relabel_configs:
      - action: keep
        regex: pushgateway
        source_labels:
        - __meta_kubernetes_service_annotation_prometheus_io_probe
    - job_name: kubernetes-services
      kubernetes_sd_configs:
      - role: service
      metrics_path: /probe
      params:
        module:
        - http_2xx
      relabel_configs:
      - action: keep
        regex: true
        source_labels:
        - __meta_kubernetes_service_annotation_prometheus_io_probe
      - source_labels:
        - __address__
        target_label: __param_target
      - replacement: blackbox
        target_label: __address__
      - source_labels:
        - __param_target
        target_label: instance
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - source_labels:
        - __meta_kubernetes_namespace
        target_label: kubernetes_namespace
      - source_labels:
        - __meta_kubernetes_service_name
        target_label: kubernetes_name
    - job_name: kubernetes-pods
      kubernetes_sd_configs:
      - role: pod
      relabel_configs:
      - action: keep
        regex: true
        source_labels:
        - __meta_kubernetes_pod_annotation_prometheus_io_scrape
      - action: replace
        regex: (.+)
        source_labels:
        - __meta_kubernetes_pod_annotation_prometheus_io_path
        target_label: __metrics_path__
      - action: replace
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
        source_labels:
        - __address__
        - __meta_kubernetes_pod_annotation_prometheus_io_port
        target_label: __address__
      - action: labelmap
        regex: __meta_kubernetes_pod_label_(.+)
      - action: replace
        source_labels:
        - __meta_kubernetes_namespace
        target_label: kubernetes_namespace
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_name
        target_label: kubernetes_pod_name
    alerting:
      alertmanagers:
      - kubernetes_sd_configs:
          - role: pod
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
        - source_labels: [__meta_kubernetes_namespace]
          regex: monitor
          action: keep
        - source_labels: [__meta_kubernetes_pod_label_app]
          regex: prometheus
          action: keep
        - source_labels: [__meta_kubernetes_pod_label_component]
          regex: alertmanager
          action: keep
        - source_labels: [__meta_kubernetes_pod_container_port_number]
          regex:
          action: drop
  rules: |
    groups:
    - name: example
      rules:
      - alert: NodeCPUUsage
        expr: 100 - (avg by (instance) (irate(node_cpu{component="node-exporter",mode="idle"}[5m])) * 100)>1
        for: 2m
        labels:
          severity: critical
          service: cpus
        annotations:
          summary: "{{$labels.instance}}: High CPU usage detected"
          description: "{{$labels.instance}}: CPU usage is above 75% (current value is: {{ $value }})"
      - alert: InstanceDown
        expr: up == 0
        for: 5m
        labels:
          severity: critical
          service: node
        annotations:
          summary: "Instance {{ $labels.instance }} down"
          description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
      - alert: APIHighRequestLatency
        expr: api_http_request_latencies_second{quantile="0.5"} > 1
        for: 10m
        labels:
          severity: critical
          service: node
        annotations:
          summary: "High request latency on {{ $labels.instance }}"
          description: "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)"

kind: ConfigMap
metadata:
  labels:
    app: prometheus
    chart: prometheus-6.2.1
    component: server
    heritage: Tiller
    release: prometheus
  name: prometheus-server
  namespace: monitor

 

apiVersion: v1
data:
  alertmanager.yml: |
    global: 
      resolve_timeout: 5m
      smtp_smarthost: 'smtp.163.com:25' 
      smtp_from: 'hxp195446040@163.com' 
      smtp_auth_username: 'hxp195446040@163.com' 
      smtp_auth_password: 'xxxxxx' 
      smtp_require_tls: false
    receivers:
    - name: default-receiver
      email_configs:
      - to: 'hxp195446040@163.com'
        html: '{{ template "email.hxp.html" .}}'
        headers: { Subject: "[WARN] 报警邮件test" }
    - name: admin-receiver
      email_configs:
      - to: 'hxp195446040@163.com'
        html: '{{ template "email.hxp.html" .}}'
        headers: { Subject: "[WARN] 报警邮件test" }
    templates: 
    - '/etc/config/*.tmpl'
    inhibit_rules:
    - source_match:
        severity: 'critical'
      target_match:
        severity: 'warning'
      equal: ['alertname', 'cluster', 'service']
    route:
      group_interval: 5m
      group_wait: 30s
      receiver: default-receiver
      repeat_interval: 3h
      group_by: ['alertname', 'cluster', 'service']
      routes:
      - match_re:
          service: ^(foo1|foo2|baz)$
        receiver: default-receiver
        routes:
        - match:
            severity: critical
          receiver: admin-receiver
      - match:
          service: cpus
        receiver: default-receiver
        routes:
        - match:
            severity: critical
          receiver: admin-receiver
      - match:
          service: node
        receiver: default-receiver
        group_by: [alertname, cluster]
        routes:
        - match:
            owner: team-X
          receiver: default-receiver
        - match:
            owner: team-Y
          receiver: admin-receiver

  hxp.tmpl: |
    {{ define "email.hxp.html" }}
    
        报警名实例开始时间摘要详情
        {{ range $i, $alert := .Alerts }}
            {{ index $alert.Labels "alertname" }}{{index $alert.Labels "instance"}}{{ $alert.StartsAt }}{{index $alert.Annotations "summary"}}{{index $alert.Annotations "description"}}
        {{ end }}
    
    {{ end }}
    {{ define "email.hxp.title.html" }}
        {{ range $i, $alert := .Alerts }}
            {{index $alert.Annotations "summary"}}|
        {{ end }}
    {{ end }}
kind: ConfigMap
metadata:
  labels:
    app: prometheus
    chart: prometheus-6.2.1
    component: alertmanager
    heritage: Tiller
    release: prometheus
  name: prometheus-alertmanager
  namespace: monitor


【版权声明】本文内容来自摩杜云社区用户原创、第三方投稿、转载,内容版权归原作者所有。本网站的目的在于传递更多信息,不拥有版权,亦不承担相应法律责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@moduyun.com

  1. 分享:
最后一次编辑于 2023年11月15日 0

暂无评论

推荐阅读
  TEZNKK3IfmPf   2024年03月30日   49   0   0 htmlhtml5
  TEZNKK3IfmPf   2024年03月22日   105   0   0 html框架
  TEZNKK3IfmPf   2024年04月26日   36   0   0 htmlScala
  TEZNKK3IfmPf   2024年03月29日   50   0   0 htmlhtml5
  TEZNKK3IfmPf   2024年03月29日   90   0   0 htmlhtml5
  TEZNKK3IfmPf   2024年03月22日   96   0   0 htmljava
  TEZNKK3IfmPf   2024年03月29日   53   0   0 htmlhtml5
  TEZNKK3IfmPf   2024年03月29日   53   0   0 htmljQuery
TEZNKK3IfmPf