一、docker-compose安装
前期准备
# docker配置文件 sudo mkdir -p /etc/docker sudo tee /etc/docker/daemon.json <<-'EOF' { "registry-mirrors": ["http://hub-mirror.c.163.com"] } EOF systemctl daemon-reload systemctl restart docker # 安装docker-compose curl -L https://github.com/docker/compose/releases/download/2.26.1/docker-compose-`uname -s`-`uname -m` > /usr/local/bin/docker-compose sudo chmod +x /usr/local/bin/docker-compose Last login: Wed Apr 24 12:10:08 2024 from 192.168.10.1 root@os:~# docker-compose -v Docker Compose version v2.26.1
二、Docker-compose安装prometheus
1.创建prometheus监控的文件夹
mkdir /data/docker-prometheus -p mkdir /data/docker-prometheus/{grafana,prometheus,alertmanager} -p cd /data/docker-prometheus/
2.创建alertmanager的配置文件
cat > alertmanager/config.yml <<"EOF" global: #163服务器 smtp_smarthost: 'smtp.163.com:465' #发邮件的邮箱 smtp_from: 'xc@163.com' #发邮件的邮箱用户名,也就是你的邮箱 smtp_auth_username: 'xc@163.com' #发邮件的邮箱密码 smtp_auth_password: 'your-password' #进行tls验证 smtp_require_tls: false route: group_by: ['alertname'] # 当收到告警的时候,等待group_wait配置的时间,看是否还有告警,如果有就一起发出去 group_wait: 10s # 如果上次告警信息发送成功,此时又来了一个新的告警数据,则需要等待group_interval配置的时间才可以发送出去 group_interval: 10s # 如果上次告警信息发送成功,且问题没有解决,则等待 repeat_interval配置的时间再次发送告警数据 repeat_interval: 10m # 全局报警组,这个参数是必选的 receiver: email receivers: - name: 'email' #收邮件的邮箱 email_configs: - to: 'xc@163.com' #当告警恢复后,是否发送邮件 send_resolved: true inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname', 'dev', 'instance'] EOF
3.创建grafana的配置文件
GF_SECURITY_ADMIN_PASSWORD=为grafana超级管理员admin的密码,根据实际修改cat > grafana/config.monitoring <<EOF GF_SECURITY_ADMIN_PASSWORD=password GF_USERS_ALLOW_SIGN_UP=false EOF
4.创建prometheus的配置文件
cat > prometheus/prometheus.yml << "EOF" # 全局配置 global: scrape_interval: 15s # 将搜刮间隔设置为每15秒一次。默认是每1分钟一次。 evaluation_interval: 15s # 每15秒评估一次规则。默认是每1分钟一次。 # Alertmanager 配置 alerting: alertmanagers: - static_configs: - targets: ['alertmanager:9093'] # 报警(触发器)配置 rule_files: - "alert.yml" # 搜刮配置 scrape_configs: - job_name: 'prometheus' # 覆盖全局默认值,每15秒从该作业中刮取一次目标 scrape_interval: 15s static_configs: - targets: ['localhost:9090'] - job_name: 'alertmanager' scrape_interval: 15s static_configs: - targets: ['alertmanager:9093'] - job_name: 'cadvisor' scrape_interval: 15s static_configs: - targets: ['cadvisor:8080'] labels: instance: Prometheus服务器 - job_name: 'node-exporter' scrape_interval: 15s static_configs: - targets: ['node_exporter:9100'] labels: instance: Prometheus服务器 EOF
5.创建alter报警文件
cat > prometheus/alert.yml <<"EOF" groups: - name: Prometheus alert rules: # 对任何实例超过30秒无法联系的情况发出警报 - alert: 服务告警 expr: up == 0 for: 30s labels: severity: critical annotations: summary: "服务异常,实例:{{ $labels.instance }}" description: "{{ $labels.job }} 服务已关闭" EOF
6.创建docker-compose安装文件
docker镜像版本可以在dockerhub上查询,然后pull
#进入到prometheus目录 cd /data/docker-prometheus #通过cat新建docker-compose.yaml文件 cat > docker-compose.yaml <<"EOF" version: '3.3' volumes: prometheus_data: {} grafana_data: {} networks: monitoring: driver: bridge services: prometheus: image: prom/prometheus:v2.37.6 container_name: prometheus restart: always volumes: - /etc/localtime:/etc/localtime:ro - ./prometheus/:/etc/prometheus/ - prometheus_data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/usr/share/prometheus/console_libraries' - '--web.console.templates=/usr/share/prometheus/consoles' #热加载配置 - '--web.enable-lifecycle' #api配置 #- '--web.enable-admin-api' #历史数据最大保留时间,默认15天 - '--storage.tsdb.retention.time=30d' networks: - monitoring links: - alertmanager - cadvisor - node_exporter expose: - '9090' ports: - 9090:9090 depends_on: - cadvisor alertmanager: image: prom/alertmanager:v0.25.0 container_name: alertmanager restart: always volumes: - /etc/localtime:/etc/localtime:ro - ./alertmanager/:/etc/alertmanager/ command: - '--config.file=/etc/alertmanager/config.yml' - '--storage.path=/alertmanager' networks: - monitoring expose: - '9093' ports: - 9093:9093 cadvisor: image: google/cadvisor:latest container_name: cadvisor restart: always volumes: - /etc/localtime:/etc/localtime:ro - /:/rootfs:ro - /var/run:/var/run:rw - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro networks: - monitoring expose: - '8080' node_exporter: image: prom/node-exporter:v1.5.0 container_name: node-exporter restart: always volumes: - /etc/localtime:/etc/localtime:ro - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro command: - '--path.procfs=/host/proc' - '--path.sysfs=/host/sys' - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc|rootfs/var/lib/docker)($$|/)' networks: - monitoring ports: - '9100:9100' grafana: image: grafana/grafana:9.4.3 container_name: grafana restart: always volumes: - /etc/localtime:/etc/localtime:ro - grafana_data:/var/lib/grafana - ./grafana/provisioning/:/etc/grafana/provisioning/ env_file: - ./grafana/config.monitoring networks: - monitoring links: - prometheus ports: - 3000:3000 depends_on: - prometheus EOF
三、启动镜像
root@os:/data/docker-prometheus# docker-compose up -d [+] Running 6/6 ? Network docker-prometheus_monitoring Created 0.1s ? Container node-exporter Started 0.2s ? Container alertmanager Started 0.2s ? Container cadvisor Started 0.2s ? Container prometheus Started 0.1s ? Container grafana Started
创建仪表盘,请参考源码安装
http://192.168.10.14:3000/
标签:compose,Started,grafana,prometheus,docker,cat From: https://www.cnblogs.com/yangmeichong/p/18155323