(文章目录)
目录结构
-
alertmanager
- alert_templates
- *.tmpl
- alertmanager.yml
- alert_templates
-
loki
- alert_rules
- loki.yaml
-
promtail
- promtail.yaml
-
prometheus
- alert_rules
- prometheus.yml
-
docker-compose.yml
docker 镜像地址切换(/etc/docker/daemon.json):
{
"registry-mirrors": ["https://cr.console.aliyun.com/"]
}
prometheus
promtheus.yml
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets: ['localhost:9090']
#Server installation exporter Configure the corresponding listening port and run it
- job_name: 'node_exporter'
static_configs:
- targets: ['192.168.152.150:9100','120.76.47.32:9100']
- job_name: 'mysql_exporter'
static_configs:
- targets: ['120.76.47.32:9104','localhost:9104']
- job_name: 'nginx-vts-exporter'
scrape_interval: 10s
static_configs:
- targets: ['120.76.47.32:9913','192.168.152.150:9913']
# Alertmanager
alerting:
alertmanagers:
- static_configs:
- targets: ['localhost:9093']
# alert filepath
rule_files:
- "/etc/prometheus/alert_rules/*.yml"
alert_rules
警告规则配置:
prometheus/alert_rules/*.yml
loki
loki.yaml
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
limits_config:
reject_old_samples: true # 是否拒绝旧样本
reject_old_samples_max_age: 168h # 168小时之前的样本被拒绝
retention_period: 360h # 数据保留期
max_query_series: 1000
max_query_parallelism: 2 # Maximum number of queries that will be scheduled in parallel by the frontend.
max_query_lookback: 24h
#max_global_streams_per_user: 0
# frontend:
# max_outstanding_per_tenant: 1024
query_scheduler:
max_outstanding_requests_per_tenant: 50
common:
instance_addr: 127.0.0.1
path_prefix: /tmp/loki
storage:
filesystem:
chunks_directory: /tmp/loki/chunks
rules_directory: /tmp/loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 10000
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h
table_manager:
retention_deletes_enabled: true # 保留删除开启
retention_period: 24h # 超过该时间的块数据将被删除
compactor:
working_directory: /tmp/loki/retention
shared_store: filesystem
compaction_interval: 10m
retention_enabled: true
retention_delete_delay: 10s
retention_delete_worker_count: 150
ruler:
storage:
type: local
local:
directory: /loki/rules
rule_path: /loki/rules-temp
alertmanager_url: http://192.168.152.150:9093
ring:
kvstore:
store: inmemory
enable_api: true
enable_alertmanager_v2: true
# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
#
# Statistics help us better understand how Loki is used, and they show us performance
# levels for most users. This helps us prioritize features and documentation.
# For more information on what's sent, look at
# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go
# Refer to the buildReport method to see what goes into a report.
#
# If you would like to disable reporting, uncomment the following lines:
#analytics:
# reporting_enabled: false
alert_rules
警告规则配置:
loki/alert_rules/*.yml
promtail
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: http://loki:3100/loki/api/v1/push
limits_config:
readline_rate_enabled: true
max_streams: 2000
readline_rate_drop: false
max_line_size: 2048
readline_burst: 2000
scrape_configs:
- job_name: deviceLog
static_configs:
- targets:
- localhost
labels:
job: localserver
__path__: /var/log/localserver/002*/*.log
- targets:
- localhost
labels:
job: bob
__path__: /var/log/bob/002*/*.log
- targets:
- localhost
labels:
job: media
__path__: /var/log/media/002*/*.log
- job_name: webLog
static_configs:
- targets:
- localhost
labels:
job: image
__path__: /var/log/image/*.log
- targets:
- localhost
labels:
job: bob-soultion
__path__: /var/log/bob-soultion/*.log
- targets:
- localhost
labels:
job: customer
__path__: /var/log/customer/*.log
alertmanager
alertmanager.yml
global:
# resolve_timeout: 1m
#587 465
smtp_smarthost: smtp.qq.com:587
smtp_from: [email protected]
smtp_auth_username: [email protected]
smtp_auth_password: osymjgsurmosdcia
templates:
- "/etc/alertmanager/alert_templates/*"
route:
group_by: ["alertname"]
group_wait: 1m
group_interval: 5m
repeat_interval: 20m
receiver: "telepush"
receivers:
- name: "wbhook"
webhook_configs:
- url: "http://192.168.152.160:8089/adapter/wx"
send_resolved: true
- name: "telepush"
telegram_configs:
- send_resolved: true
api_url: "https://api.telegram.org"
bot_token: "6100072571:AAFPoUqywbdxv-wFGxPoa6dF4f9VKuvtjhk"
chat_id: -857040540
message: '{{ template "default.to.message" .}}'
- send_resolved: true
api_url: "https://api.telegram.org"
bot_token: "6100072571:AAFPoUqywbdxv-wFGxPoa6dF4f9VKuvtjhk"
chat_id: -1001929772831
message: '{{ template "default.to.message" .}}'
inhibit_rules:
- source_match:
severity: "critical"
target_match:
severity: "warning"
equal: ["alertname", "dev", "instance"]
alert_templates
警告模板:
alertmanager/alert_templates/*.tmpl
{{ define "default.to.message" }}
{{ range .Alerts }}
=========start==========
告警状态:{{ .Status }}
告警级别: {{ .Labels.severity }} 级
告警类型: {{ .Labels.alertname }}
故障主机: {{ .Labels.instance }}
告警主题: {{ .Annotations.summary }}
触发阀值:{{ .Annotations.value }}
告警详情: {{ .Annotations.description }}
触发时间: {{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
=========end==========
{{ end }}
{{ end }}
docker-compose 配置
version: "3.7"
services:
loki:
image: grafana/loki:2.8.0
container_name: loki
restart: always
ports:
- "3100:3100"
volumes:
- ./loki/loki-config.yaml:/etc/loki/local-config.yaml
- ./loki/alert_rules/:/loki/rules/fake
promtail:
image: grafana/promtail:2.8.0
container_name: promtail
restart: always
ports:
- "9080:9080"
volumes:
- ./promtail/promtail.yaml:/etc/promtail/config.yml
- /home/tjc/log:/var/log
links:
- loki
grafana:
image: grafana/grafana-enterprise:9.4.7
container_name: grafana
restart: always
ports:
- "3000:3000"
prometheus:
image: prom/prometheus:v2.43.0
container_name: prometheus
restart: always
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
alertmanager:
image: quay.io/prometheus/alertmanager:v0.25.0
container_name: alertmanager
restart: always
ports:
- "9093:9093"
webhook-adapter:
image: guyongquan/webhook-adapter:latest
container_name: webhook-adapter
hostname: webhook-adapter
ports:
- "8089:80"
command:
- "--adapter=/app/prometheusalert/wx.js=/wx=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=c5bb4610-eba4-4476-8eae-29f230bbf143"
exporter 安装
在你需要监控的对应服务器上安装运行
nginx-exporter
mysqld-exporter
开始运行
docker-compose up -d
版本查看
grafana:
docker exec grafana grafana-cli -v
docker exec grafana grafana-server -v
prometheus:
docker exec -it 79ecc34b518f /bin/prometheus --version
docker exec -it 42dbe7c3997c /bin/alertmanager --version
#79ecc34b518f 42dbe7c3997c 为CONTAINER ID
#prometheus alertmanager NAMES
标签:name,rules,prometheus,Grafana,job,Loki,loki,log
From: https://blog.51cto.com/u_14661954/6347215