exporter安装
分别在两个集群中的任一节点安装elasticsearch_exporter
节点1安装
nohup ./elasticsearch_exporter --es.all --es.indices --es.cluster_settings --es.indices_settings --es.shards --es.snapshots --es.timeout=10s --web.listen-address=":9114" --web.telemetry-path="/metrics" --es.ssl-skip-verify --es.uri="https://elastic:[email protected]:9200" > /dev/null 2>&1 &
节点2安装
nohup ./elasticsearch_exporter --es.all --es.indices --es.cluster_settings --es.indices_settings --es.shards --es.snapshots --es.timeout=10s --web.listen-address=":9114" --web.telemetry-path="/metrics" --es.ssl-skip-verify --es.uri="https://elastic:[email protected]:9200" > /dev/null 2>&1 &
prometheus配置job
scrape_configs: # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: "prometheus" static_configs: - targets: ["10.30.92.71:9090"] - job_name: "alertmanager" static_configs: - targets: ["10.30.92.71:9095"] - job_name: "elasticsearch_asset" static_configs: - targets: ["10.32.3.2:9114"] - job_name: "elasticsearch_sa" static_configs: - targets: ["10.32.3.18:9114"] - job_name: "node_exporter" static_configs: - targets: ["10.32.3.2:9100","10.32.3.3:9100","10.32.3.5:9100"]prometheus.yml
prometheus配置告警规则
groups: - name: PrometheusCommunityElasticsearchExporter rules: - alert: ElasticsearchHeapUsageTooHigh expr: '(elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"}) * 100 > 90' for: 2m labels: severity: critical annotations: summary: Elasticsearch Heap Usage Too High (instance {{ $labels.instance }}) description: "The heap usage is over 90%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchHeapUsageWarning expr: '(elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"}) * 100 > 80' for: 2m labels: severity: warning annotations: summary: Elasticsearch Heap Usage warning (instance {{ $labels.instance }}) description: "The heap usage is over 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchDiskOutOfSpace expr: 'elasticsearch_filesystem_data_available_bytes / elasticsearch_filesystem_data_size_bytes * 100 < 10' for: 0m labels: severity: critical annotations: summary: Elasticsearch disk out of space (instance {{ $labels.instance }}) description: "The disk usage is over 90%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchDiskSpaceLow expr: 'elasticsearch_filesystem_data_available_bytes / elasticsearch_filesystem_data_size_bytes * 100 < 20' for: 2m labels: severity: warning annotations: summary: Elasticsearch disk space low (instance {{ $labels.instance }}) description: "The disk usage is over 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchClusterRed expr: 'elasticsearch_cluster_health_status{color="red"} == 1' for: 0m labels: severity: critical annotations: summary: Elasticsearch Cluster Red (instance {{ $labels.instance }}) description: "Elastic Cluster Red status\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchClusterYellow expr: 'elasticsearch_cluster_health_status{color="yellow"} == 1' for: 0m labels: severity: warning annotations: summary: Elasticsearch Cluster Yellow (instance {{ $labels.instance }}) description: "Elastic Cluster Yellow status\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchHealthyNodes expr: 'elasticsearch_cluster_health_number_of_nodes < 3' for: 0m labels: severity: critical annotations: summary: Elasticsearch Healthy Nodes (instance {{ $labels.instance }}) description: "Missing node in Elasticsearch cluster\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchHealthyDataNodes expr: 'elasticsearch_cluster_health_number_of_data_nodes < 3' for: 0m labels: severity: critical annotations: summary: Elasticsearch Healthy Data Nodes (instance {{ $labels.instance }}) description: "Missing data node in Elasticsearch cluster\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchRelocatingShards expr: 'elasticsearch_cluster_health_relocating_shards > 0' for: 0m labels: severity: info annotations: summary: Elasticsearch relocating shards (instance {{ $labels.instance }}) description: "Elasticsearch is relocating shards\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchRelocatingShardsTooLong expr: 'elasticsearch_cluster_health_relocating_shards > 0' for: 15m labels: severity: warning annotations: summary: Elasticsearch relocating shards too long (instance {{ $labels.instance }}) description: "Elasticsearch has been relocating shards for 15min\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchInitializingShards expr: 'elasticsearch_cluster_health_initializing_shards > 0' for: 0m labels: severity: info annotations: summary: Elasticsearch initializing shards (instance {{ $labels.instance }}) description: "Elasticsearch is initializing shards\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchInitializingShardsTooLong expr: 'elasticsearch_cluster_health_initializing_shards > 0' for: 15m labels: severity: warning annotations: summary: Elasticsearch initializing shards too long (instance {{ $labels.instance }}) description: "Elasticsearch has been initializing shards for 15 min\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchUnassignedShards expr: 'elasticsearch_cluster_health_unassigned_shards > 0' for: 0m labels: severity: critical annotations: summary: Elasticsearch unassigned shards (instance {{ $labels.instance }}) description: "Elasticsearch has unassigned shards\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchPendingTasks expr: 'elasticsearch_cluster_health_number_of_pending_tasks > 0' for: 15m labels: severity: warning annotations: summary: Elasticsearch pending tasks (instance {{ $labels.instance }}) description: "Elasticsearch has pending tasks. Cluster works slowly.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchNoNewDocuments expr: 'increase(elasticsearch_indices_indexing_index_total{es_data_node="true"}[10m]) < 1' for: 0m labels: severity: warning annotations: summary: Elasticsearch no new documents (instance {{ $labels.instance }}) description: "No new documents for 10 min!\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"rules.yml
监控和告警的时候是根据instance来抓取的 所以和es的集群名字没有太大的关系
grafana监控多集群数据
标签:--,labels,instance,prometheus,elasticsearch,Elasticsearch,告警,es From: https://www.cnblogs.com/yxh168/p/18008356