方法1:
一、使用webhook-adapter同步信息到企业微信
1.编辑alertmanager.yml文件
global: resolve_timeout: 5m scrape_interval: 15s templates: - '/data/prometheus/alertmanager/template/*.tmpl' route: group_by: ['alertname'] group_wait: 10s group_interval: 10s repeat_interval: 30s receiver: 'web.hook' receivers: - name: 'web.hook' webhook_configs: - url: 'http://127.0.0.1:8080/adapter/wx' send_resolved: true inhibit_rules: - source_match: alertname: 'ApplicationDown' severity: 'critical' target_match: severity: 'warning' equal: ['alertname',"target","job","instance"]
2.docker安装企业微信报警插件(webhook-adapter),启用一个企微机器人。
docker run -d --name wechat \ --restart always -p 8080:80 \ guyongquan/webhook-adapter \ --adapter=/app/prometheusalert/wx.js=/wx=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=0eb7......68
二、使用
1.设置alertmanager配置文件
global: resolve_timeout: 5m scrape_interval: 15s route: group_by: ['alertname'] group_wait: 10s group_interval: 10s repeat_interval: 30s receiver: 'ops_notify' routes: - receiver: ops_notify group_wait: 10s match_re: alertname: 'NodeStatsAlert' receivers: - name: 'ops_notify' webhook_configs: - url: 'http://10.0.xx.101:5000' send_resolved: true inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname', 'dev', 'instance']
设置rules/mysql.yml(要添加node_name标签,不然会报错)
groups: - name: Mysql-rules rules: - alert: Mysql status expr: mysql_up == 0 for: 5s labels: severity: error node_name: "{{ $labels.instance }}" annotations: summary: "您的 {{ $labels.instance }} 的Mysql已停止运行!" description: "Mysql数据库宕机,请检查" - alert: Mysql slave io thread status expr: mysql_slave_status_slave_io_running == 0 for: 5s labels: severity: error node_name: "{{ $labels.instance }}" annotations: summary: "您的 {{ $labels.instance }} Mysql slave io thread已停止" description: "Mysql主从IO线程故障,请检测" - alert: Mysql slave sql thread status expr: mysql_slave_status_slave_sql_running == 0 for: 5s labels: severity: error node_name: "{{ $labels.instance }}" annotations: summary: "您的 {{ $labels.instance }} Mysql slave sql thread已停止" description: "Mysql主从sql线程故障,请检测"
2.在webhook-wechat目录下,有如下脚本文件:
app.py
# -*- coding: utf-8 -*- import os import json import requests import arrow from flask import Flask from flask import request app = Flask(__name__) def bytes2json(data_bytes): data = data_bytes.decode('utf8').replace("'", '"') return json.loads(data) def makealertdata(data): for output in data['alerts'][:]: try: pod_name = output['labels']['pod'] except KeyError: try: pod_name = output['labels']['pod_name'] except KeyError: pod_name = 'null' try: namespace = output['labels']['namespace'] except KeyError: namespace = 'null' try: message = output['annotations']['message'] except KeyError: try: message = output['annotations']['description'] except KeyError: message = 'null' if output['status'] == 'firing': status_zh = '报警' title = '【%s】xxxx环境 %s 有新的报警' % (status_zh, output['labels']['alertname']) send_data = { "msgtype": "markdown", "markdown": { "content": "## %s \n\n" %title + ">**告警级别**: %s \n\n" % output['labels']['severity'] + ">**告警类型**: %s \n\n" % output['labels']['alertname'] + ">**告警主机**: %s \n\n" % output['labels']['node_name'] + ">**告警详情**: %s \n\n" % message + ">**告警状态**: %s \n\n" % output['status'] + ">**触发时间**: %s \n\n" % arrow.get(output['startsAt']).to('Asia/Shanghai').format( 'YYYY-MM-DD HH:mm:ss ZZ') } } elif output['status'] == 'resolved': status_zh = '恢复' title = '【%s】xxxx环境 %s 有报警恢复' % (status_zh, output['labels']['alertname']) send_data = { "msgtype": "markdown", "markdown": { "content": "## %s \n\n" %title + ">**告警级别**: %s \n\n" % output['labels']['severity'] + ">**告警类型**: %s \n\n" % output['labels']['alertname'] + ">**告警主机**: %s \n\n" % output['labels']['node_name'] + ">**告警详情**: %s \n\n" % message + ">**告警状态**: %s \n\n" % output['status'] + ">**触发时间**: %s \n\n" % arrow.get(output['startsAt']).to('Asia/Shanghai').format( 'YYYY-MM-DD HH:mm:ss ZZ') + ">**触发结束时间**: %s \n" % arrow.get(output['endsAt']).to('Asia/Shanghai').format( 'YYYY-MM-DD HH:mm:ss ZZ') } } return send_data def send_alert(data): #此处获取环境变量“ROBOT_TOKEN”,会在docker-compose的配置文件中配置,docker-compose启动docker时向docker容器注入环境变量 token = os.getenv('ROBOT_TOKEN') if not token: print('you must set ROBOT_TOKEN env') return url = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=%s' % token send_data = makealertdata(data) req = requests.post(url, json=send_data) result = req.json() if result['errcode'] != 0: print('notify dingtalk error: %s' % result['errcode']) @app.route('/', methods=['POST', 'GET']) def send(): if request.method == 'POST': post_data = request.get_data() send_alert(bytes2json(post_data)) return 'success' else: return 'weclome to use prometheus alertmanager dingtalk webhook server!' if __name__ == '__main__': app.run(host='0.0.0.0', port=5000)
requirements.txt
certifi==2018.10.15 chardet==3.0.4 Click==7.0 Flask==1.0.2 idna==2.7 itsdangerous==1.1.0 Jinja2==2.10 MarkupSafe==1.1.0 requests==2.20.1 urllib3==1.24.1 Werkzeug==0.14.1 arrow==0.13.1
Dockerfile
FROM python:3.6.4 # set working directory WORKDIR /src # add app ADD . /src # install requirements RUN pip install -r requirements.txt EXPOSE 5000 # run server CMD python app.py
docker-compose.yml
version: "2" networks: monitor: driver: bridge services: prometheus-webhook-alert: build: . restart: always volumes: - /etc/localtime:/etc/localtime - ./app.py:/src/app.py ports: - "5000:5000" environment: # 此处设置的环境变量会被app.py运行时获取到 ROBOT_TOKEN: "0eb7......68" networks: - monitor
3.各个程序都已经写好了,开始启动
[root@ webhook-wechat]# ls app.py docker-compose.yml Dockerfile requirements.txt
启动并查看日志
# 启动 ]# docker-compose up -d #检查是否启动,一下三种方式都能启动 ]# docker-compose status ]# docker ps -a ]# ss -luntp |grep 5000 # 日志查看,一下两种都可查看,-f 支持输出日志 ]# docker-compose logs ]# docker-compose logs -f
标签:status,name,微信,labels,Prometheus,output,告警,data,docker From: https://www.cnblogs.com/jiangxm157/p/18071280