配置邮件示例
# Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: - 192.168.19.55:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "rules/node_rules.yml" # - "second_rules.yml"prometheus.yml
groups: - name: node-rules rules: - alert: node-up expr: up == 0 for: 15s labels: severity: 1 team: node annotations: summary: "{{$labels.instance}}Instance has been down for more than 5 minutes"node_rules.yml
global: resolve_timeout: 5m smtp_smarthost: 'smtp.163.com:25' smtp_from: 'cfgitlab_admin@163.com' smtp_auth_username: 'cfgitlab_admin@163.com' smtp_auth_password: '1111111' smtp_require_tls: false templates: - '/root/prom/alertmanager-0.26.0.linux-amd64/email.tmpl' route: group_by: ['alertname'] group_wait: 30s group_interval: 5m repeat_interval: 1h receiver: 'email' receivers: - name: 'email' email_configs: - to: '12727@qq.com' html: '{{ template "email.to.html" . }}' send_resolved: true inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname']alertmanager.yml
{{ define "email.to.html" }} {{ if gt (len .Alerts.Firing) 0 }}{{ range .Alerts }} @告警: <br> 告警程序: prometheus_alert <br> 告警级别: {{ .Labels.severity }} 级<br> 告警类型: {{ .Labels.alertname }} <br> 故障主机: {{ .Labels.instance }} <br> 告警主题: {{ .Annotations.summary }} <br> 告警详情: {{ .Annotations.description }} <br> 触发时间: {{ .StartsAt }} <br> {{ end }} {{ end }} {{ if gt (len .Alerts.Resolved) 0 }}{{ range .Alerts }} @恢复: <br> 告警主机: {{ .Labels.instance }} <br> 告警主题: {{ .Annotations.summary }} <br> 恢复时间: {{ .EndsAt }} <br> {{ end }} {{ end }} {{ end }}email.tmpl
up指标用来监控主机是否宕机下线
告警规则的rule的数据 就是这里的查询语句查询出来的结果
配置钉钉消息告警
global: resolve_timeout: 5m smtp_smarthost: 'smtp.163.com:25' smtp_from: 'cfgitlab_admin@163.com' smtp_auth_username: 'cfgitlab_admin@163.com' smtp_auth_password: 'TCTJGF' smtp_require_tls: false templates: - '/root/prom/alertmanager-0.26.0.linux-amd64/email.tmpl' route: group_by: ['alertname'] group_wait: 30s group_interval: 5m repeat_interval: 1h receiver: 'dingding.webhook1' routes: - receiver: 'dingding.webhook1' continue: true #发送完钉钉消息后继续发送邮件 - receiver: 'email' receivers: - name: 'email' email_configs: - to: '12727@qq.com,1037715202@qq.com,423096959@qq.com,2841184943@qq.com' html: '{{ template "email.to.html" . }}' send_resolved: true - name: 'dingding.webhook1' webhook_configs: - url: 'http://192.168.19.55:8060/dingtalk/webhook1/send' send_resolved: true inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname']alertmanager.yml
## Request timeout # timeout: 5s ## Uncomment following line in order to write template from scratch (be careful!) #no_builtin_template: true ## Customizable templates path templates: - contrib/templates/legacy/template.tmpl ## You can also override default template using `default_message` ## The following example to use the 'legacy' template from v0.3.0 #default_message: # title: '{{ template "legacy.title" . }}' # text: '{{ template "legacy.content" . }}' ## Targets, previously was known as "profiles" targets: webhook1: url: https://oapi.dingtalk.com/robot/send?access_token=7f7fe25 # secret for signature secret: 4ee16602f4108eb793475803cfa26a1be5210ccf1 # webhook2: # url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx # webhook_legacy: # url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx # Customize template content # message: # Use legacy template # title: '{{ template "legacy.title" . }}' # text: '{{ template "legacy.content" . }}' # webhook_mention_all: # url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx # mention: # all: true # webhook_mention_users: # url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx # mention: # mobiles: ['156xxxx8827', '189xxxx8325']prometheus-webhook-dingtalk.yml
需单独安装 prometheus-webhook-dingtalk-2.1.0.linux-amd64组件
内网代理钉钉消息
内网中只有一台机器可以联网
监控的主机无法连接外部的机器
给prometheus-webhook-dingtalk进程设置代理地址环境变量 不能设置成系统变量,因为这样会影响系统上所有的https请求
添加进程环境变量
//在程序启动的时候注入多个环境变量
export https_proxy=http://ct:kXoNx@10.30.90.15:3128 && export http_proxy=http://cta:kgX3Nx@10.30.90.15:3128 && ./prometheus-webhook-dingtalk --config.file=config.yml
1.把代理地址做为环境变量注入到指定的进程中
//在程序启动的时候注入一个环境变量
export https_proxy=http://aaaaa:22222@10.30.90.1:3128 && ./prometheus-webhook-dingtalk --config.file=config.yml
//在程序启动的时候注入多个环境变量
export https_proxy=http://ct:kXoNx@10.30.90.15:3128 && export http_proxy=http://cta:k8TgX3Nx@10.30.90.15:3128 && ./prometheus-webhook-dingtalk --config.file=config.yml
2.查看进程的环境变量
ps -ef | grep webhook
cat /proc/5663/environ | tr '\0' '\n'
cat /proc/5663/comm
标签:发送,dingtalk,webhook,yml,告警信息,prometheus,template,com,email From: https://www.cnblogs.com/yxh168/p/17997030