首页 > 其他分享 >ESXI与PVE监控告警

ESXI与PVE监控告警

时间:2022-12-05 19:58:12浏览次数:59  
标签:exporter name ESXI labels value instance pve PVE 告警

ESXI

配置文件

[root@192 exporter]# cat vmware_exporter/config.env
[email protected]
VSPHERE_PASSWORD=P@sswor
VSPHERE_HOST=192.168.0.20
VSPHERE_IGNORE_SSL=TRUE
VSPHERE_SPECS_SIZE=2000

启动脚本

[root@192 exporter]# cat vmware_exporter/start.sh
docker run -it -d  --rm -p 9272:9272 --name vmware_exporter --env-file config.env pryorda/vmware_exporter

prometheus配置

  - job_name: 'vmware_vcenter'
    metrics_path: '/metrics'
    static_configs:
      - targets:
        - '192.168.0.20'
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 192.168.0.241:9272

grafana图表:11243

告警规则

[root@192 data]# cat /home/prometheus/rules/vmware-exporter.rules
groups:
- name: vmwarestatus
  rules:
  - alert: HighNumberOfSnapshots
    expr: vmware_vm_snapshots > 5
    for: 30m
    labels:
      severity: critical
      department: 大学城
    annotations:
      summary: High Number of Snapshots (instance {{ $labels.instance }})
      description: "High snapshots number on {{ $labels.instance }}: {{ $value }}\n  Num = {{ $value }}\n  VMware_Name = {{ $labels.vm_name }}"
  - alert: VirtualMachineMemoryCritical
    expr: vmware_vm_mem_usage_average / 100 >= 90
    for: 30m
    labels:
      severity: critical
      department: 大学城
    annotations:
      summary: Virtual Machine Memory Critical (instance {{ $labels.instance }})
      description: "High memory usage on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: EsxiHostCPUCritical
    expr: ((vmware_host_cpu_usage / vmware_host_cpu_max) * 100) > 90
    for: 5m
    labels:
      severity: critical
      department: 大学城
    annotations:
      summary:  Esxi Host CPU Warning (instance {{ $labels.instance }})
      description: "Outdated Host Esxi CPU on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: EsxiDiskUages
    expr: ((vmware_datastore_capacity_size - vmware_datastore_freespace_size) / vmware_datastore_capacity_size) * 100 >99
    for: 5m
    labels:
      severity: critical
      department: 大学城
    annotations:
      summary:  Esxi Host Disk Warning (instance {{ $labels.instance }})
      description: "Outdated Host Esxi Disk on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: EsxiHostStatus
    expr: vmware_host_power_state == 0
    for: 5m
    labels:
      severity: critical
      department: 大学城
    annotations:
      summary:  Esxi Host Status Warning (instance {{ $labels.instance }})
      description: "Outdated Host Esxi Status on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

PVE

配置文件

说明,要先创建对应用户,并赋予监控权限。参考:https://github.com/nbuchwitz/check_pve

[root@192 exporter]# cat pve_exporter/pve.yml
default:
    user: prometheus@pve
    password: P@ssword7
    verify_ssl: false

启动脚本

[root@192 exporter]# cat pve_exporter/start.sh
docker run --name prometheus-pve-exporter -d -p 9221:9221 -v /data/exporter/pve_exporter/pve.yml:/etc/pve.yml prompve/prometheus-pve-exporter

prometheus配置

  - job_name: 'pve'
    static_configs:
      - targets:
        - 192.168.0.150
    metrics_path: /pve
    params:
      module: [default]
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 192.168.0.241:9221

grafana图表:10347

告警规则

[root@192 exporter]# cat /home/prometheus/rules/pve-exporter.rules
groups:
- name: pvestatus
  rules:
  - alert: PVEMemoryWarning
    expr: ((pve_memory_usage_bytes / pve_memory_size_bytes * on(id, instance) group_left(name, type) pve_guest_info) and on(id, instance) pve_up == 1) * 100 >98
    for: 5m
    labels:
      severity: critical
      department: 大学城
    annotations:
      summary: pve Memory Warning (instance {{ $labels.name }})
      description: "High memory usage on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: PVEHostCPUCritical
    expr: (((pve_cpu_usage_ratio / pve_cpu_usage_limit) * on(id, instance) group_left(name, type) pve_guest_info) and on(id, instance) pve_up == 1) *100 >90
    for: 5m
    labels:
      severity: critical
      department: 大学城
    annotations:
      summary:  PVE Host CPU Warning (instance {{ $labels.name }})
      description: "Outdated Host PVE CPU on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: PVEDiskUsage
    expr: pve_disk_usage_bytes{id=~"storage/.+"} / pve_disk_size_bytes * on (id, instance) group_left(storage) pve_storage_info *100 >95
    for: 5m
    labels:
      severity: critical
      department: 大学城
    annotations:
      summary:  PVE disk usage > 95%!!! (instance {{ $labels.name }})
      description: "Outdated Host PVE  on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: PVENodeStatus
    expr: pve_node_info ==0
    for: 5m
    labels:
      severity: critical
      department: 大学城
    annotations:
      summary:  PVE node is downing %!!! (instance {{ $labels.name }})
      description: "Outdated Host PVE  on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

标签:exporter,name,ESXI,labels,value,instance,pve,PVE,告警
From: https://www.cnblogs.com/Dev0ps/p/16953291.html

相关文章