ESXI
配置文件
[root@192 exporter]# cat vmware_exporter/config.env
[email protected]
VSPHERE_PASSWORD=P@sswor
VSPHERE_HOST=192.168.0.20
VSPHERE_IGNORE_SSL=TRUE
VSPHERE_SPECS_SIZE=2000
启动脚本
[root@192 exporter]# cat vmware_exporter/start.sh
docker run -it -d --rm -p 9272:9272 --name vmware_exporter --env-file config.env pryorda/vmware_exporter
prometheus配置
- job_name: 'vmware_vcenter'
metrics_path: '/metrics'
static_configs:
- targets:
- '192.168.0.20'
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.0.241:9272
grafana图表:11243
告警规则
[root@192 data]# cat /home/prometheus/rules/vmware-exporter.rules
groups:
- name: vmwarestatus
rules:
- alert: HighNumberOfSnapshots
expr: vmware_vm_snapshots > 5
for: 30m
labels:
severity: critical
department: 大学城
annotations:
summary: High Number of Snapshots (instance {{ $labels.instance }})
description: "High snapshots number on {{ $labels.instance }}: {{ $value }}\n Num = {{ $value }}\n VMware_Name = {{ $labels.vm_name }}"
- alert: VirtualMachineMemoryCritical
expr: vmware_vm_mem_usage_average / 100 >= 90
for: 30m
labels:
severity: critical
department: 大学城
annotations:
summary: Virtual Machine Memory Critical (instance {{ $labels.instance }})
description: "High memory usage on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: EsxiHostCPUCritical
expr: ((vmware_host_cpu_usage / vmware_host_cpu_max) * 100) > 90
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: Esxi Host CPU Warning (instance {{ $labels.instance }})
description: "Outdated Host Esxi CPU on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: EsxiDiskUages
expr: ((vmware_datastore_capacity_size - vmware_datastore_freespace_size) / vmware_datastore_capacity_size) * 100 >99
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: Esxi Host Disk Warning (instance {{ $labels.instance }})
description: "Outdated Host Esxi Disk on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: EsxiHostStatus
expr: vmware_host_power_state == 0
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: Esxi Host Status Warning (instance {{ $labels.instance }})
description: "Outdated Host Esxi Status on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
PVE
配置文件
说明,要先创建对应用户,并赋予监控权限。参考:https://github.com/nbuchwitz/check_pve
[root@192 exporter]# cat pve_exporter/pve.yml
default:
user: prometheus@pve
password: P@ssword7
verify_ssl: false
启动脚本
[root@192 exporter]# cat pve_exporter/start.sh
docker run --name prometheus-pve-exporter -d -p 9221:9221 -v /data/exporter/pve_exporter/pve.yml:/etc/pve.yml prompve/prometheus-pve-exporter
prometheus配置
- job_name: 'pve'
static_configs:
- targets:
- 192.168.0.150
metrics_path: /pve
params:
module: [default]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.0.241:9221
grafana图表:10347
告警规则
[root@192 exporter]# cat /home/prometheus/rules/pve-exporter.rules
groups:
- name: pvestatus
rules:
- alert: PVEMemoryWarning
expr: ((pve_memory_usage_bytes / pve_memory_size_bytes * on(id, instance) group_left(name, type) pve_guest_info) and on(id, instance) pve_up == 1) * 100 >98
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: pve Memory Warning (instance {{ $labels.name }})
description: "High memory usage on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PVEHostCPUCritical
expr: (((pve_cpu_usage_ratio / pve_cpu_usage_limit) * on(id, instance) group_left(name, type) pve_guest_info) and on(id, instance) pve_up == 1) *100 >90
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: PVE Host CPU Warning (instance {{ $labels.name }})
description: "Outdated Host PVE CPU on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PVEDiskUsage
expr: pve_disk_usage_bytes{id=~"storage/.+"} / pve_disk_size_bytes * on (id, instance) group_left(storage) pve_storage_info *100 >95
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: PVE disk usage > 95%!!! (instance {{ $labels.name }})
description: "Outdated Host PVE on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PVENodeStatus
expr: pve_node_info ==0
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: PVE node is downing %!!! (instance {{ $labels.name }})
description: "Outdated Host PVE on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
标签:exporter,name,ESXI,labels,value,instance,pve,PVE,告警
From: https://www.cnblogs.com/Dev0ps/p/16953291.html