prometheus
prometheus.yaml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: ['192.168.39.101:9093']
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- /etc/prometheus/rules/*.rules
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["192.168.39.101:9090"]
#-------new edit----------
# 采集node exporter监控数据
# - job_name: 'node'
# static_configs:
# - targets: ['192.168.39.101:9100']
# - targets: ['192.168.39.102:9100']
# 使用consul自动发现
- job_name: 'consul-prometheus'
consul_sd_configs:
- server: '192.168.39.102:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*node-exporter.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
- job_name: 'consul-cadvisor-exporter'
consul_sd_configs:
- server: '192.168.39.102:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*cadvisor-exporter.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
# 采集 boost监控
- job_name: 'boost'
static_configs:
- targets: ['10.20.0.169:1288']
# cAdvisor
- job_name: cadvisor
static_configs:
- targets:
- 192.168.39.101:8080
#mysql
- job_name: mysql
static_configs:
- targets:
- 10.20.2.117:9105
# blockbox
- job_name: baidu_http2xx_probe
params:
module:
- http_2xx
target:
- baidu.com
metrics_path: /probe
static_configs:
- targets:
- 192.168.39.102:9115
- job_name: prometheus_http2xx_probe
params:
module:
- http_2xx
target:
- prometheus.io
metrics_path: /probe
static_configs:
- targets:
- 192.168.39.102:9115
# Relabeling
- job_name: 'blackbox'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- http://prometheus.io # Target to probe with http.
- https://prometheus.io # Target to probe with https.
- http://mi.com # Target to probe with http on port 8080.
- http://bing.com # Target to probe with http on port 8080.
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.39.102:9115
# remote storage
#remote_write:
# - url: "http://10.20.2.117:8086/api/v1/prom/write?db=prometheus&u=admin&p=admin"
#remote_read:
# - url: "http://10.20.2.117:8086/api/v1/prom/read?db=prometheus&u=admin&p=admin"
remote_write:
- url: "http://localhost:9201/write"
remote_read:
- url: "http://localhost:9201/read"
alert rules
cat /etc/prometheus/rules/hoststats-alert.rules
groups:
- name: hostStatsAlert
rules:
- alert: hostCpuUsageAlert
expr: sum(avg without (cpu_seconds_total)(irate(node_cpu_seconds_total{mode!='idle'}[5m]))) by (instance) > 0.85
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} CPU usgae high"
description: "{{ $labels.instance }} CPU usage above 85% (current value: {{ $value }})"
- alert: hostMemUsageAlert
expr: (node_memory_MemTotal - node_memory_MemAvailable)/node_memory_MemTotal > 0.85
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} MEM usgae high"
description: "{{ $labels.instance }} MEM usage above 85% (current value: {{ $value }})"
alertmanager
cat alertmanager-0.26.0.linux-amd64/alertmanager.yml
global:
slack_api_url: https://hooks.slack.com/services/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
route:
group_by: ['alertname']
group_wait: 30s
group_interval: 5m
repeat_interval: 1h
receiver: 'slack'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:5001/'
- name: slack
slack_configs:
- channel: '#prometheus_alertmananger_hook'
send_resolved: true
text: 'https://internal.myorg.net/wiki/alerts/{{ .GroupLabels.app }}/{{ .GroupLabels.alertname }}'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']****
blackbox
cat blackbox_exporter-0.24.0.linux-amd64/blackbox.yml
modules:
http_2xx:
prober: http
http:
preferred_ip_protocol: "ip4"
http_post_2xx:
prober: http
http:
method: POST
tcp_connect:
prober: tcp
pop3s_banner:
prober: tcp
tcp:
query_response:
- expect: "^+OK"
tls: true
tls_config:
insecure_skip_verify: false
grpc:
prober: grpc
grpc:
tls: true
preferred_ip_protocol: "ip4"
grpc_plain:
prober: grpc
grpc:
tls: false
service: "service1"
ssh_banner:
prober: tcp
tcp:
query_response:
- expect: "^SSH-2.0-"
- send: "SSH-2.0-blackbox-ssh-check"
irc_banner:
prober: tcp
tcp:
query_response:
- send: "NICK prober"
- send: "USER prober prober prober :prober"
- expect: "PING :([^ ]+)"
send: "PONG ${1}"
- expect: "^:[^ ]+ 001"
icmp:
prober: icmp
icmp_ttl5:
prober: icmp
timeout: 5s
icmp:
ttl: 5****
标签:http,name,配置文件,汇总,job,prometheus,configs,prober
From: https://www.cnblogs.com/jasmine456/p/17995830