目录
prometheus 部署到一个容器里配置脚本
背景
公司要求把prometheus,grafana,alert,consul以及几个exporter放到一个容器内,因此写了一个脚本部署并启动他们,我这里只记录了脚本,安装包可以自行从官网下载,没有做过二次处理
安装脚本
Dockerfile
//因为用到上述组件,用dockerfile
vim Dockerfile
FROM guchuanlong/jdk:8
MAINTAINER liwenchao "liwenchao"
LABEL version="1.0"
ENV MYPATH /usr/local
WORKDIR $MYPATH
ADD prometheus-2.37.1.linux-amd64.tar.gz $MYPATH
RUN cd $MYPATH && mv prometheus-2.37.1.linux-amd64 prometheus && chmod -R 777 ./prometheus
ADD grafana-enterprise-9.1.1.linux-amd64.tar.gz $MYPATH
RUN cd $MYPATH && mv grafana-9.1.1 grafana && chmod -R 777 ./grafana
ADD alertmanager-0.24.0.linux-amd64.tar.gz $MYPATH
RUN cd $MYPATH && mv alertmanager-0.24.0.linux-amd64 alertmanager && chmod -R 777 ./alertmanager
COPY consul_1.13.0_linux_amd64.zip $MYPATH
RUN yum -y install unzip && unzip ./consul_1.13.0_linux_amd64.zip && chmod -R 777 $MYPATH/consul
ADD node_exporter-1.4.0.linux-amd64.tar.gz $MYPATH
RUN cd $MYPATH && mv node_exporter-1.4.0.linux-amd64 node_exporter-1.4.0 && chmod -R 777 ./node_exporter-1.4.0
ADD mysqld_exporter-0.14.0.linux-amd64.tar.gz $MYPATH
RUN cd $MYPATH && \
mv mysqld_exporter-0.14.0.linux-amd64 mysqld_exporter && \
chmod -R 777 ./mysqld_exporter
COPY .my.cnf $MYPATH/mysqld_exporter/
ADD redis_exporter-v1.44.0.linux-amd64.tar.gz $MYPATH
RUN cd $MYPATH && mv redis_exporter-v1.44.0.linux-amd64 redis_exporter && chmod -R 777 ./redis_exporter
COPY cadvisor $MYPATH
RUN chmod -R 777 ./cadvisor
COPY rocketmq-exporter-0.0.2-SNAPSHOT-exec.jar $MYPATH
RUN chmod -R 777 ./rocketmq-exporter-0.0.2-SNAPSHOT-exec.jar
COPY start.sh /usr/local/start.sh
RUN chmod 755 /usr/local/start.sh
CMD "/usr/local/start.sh"
start.sh 启动脚本
//start.sh.demo是启动脚本模板,经过restart.sh渲染生成start.sh
vim start.sh.demo
#!/bin/bash
source /etc/profile &
/usr/local/grafana/bin/grafana-server -homepath /usr/local/grafana &
/usr/local/consul agent -dev -ui -node=consul-dev-14 -client=0.0.0.0 &
/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml &
/usr/local/prometheus/prometheus --web.enable-lifecycle --config.file=/usr/local/prometheus/prometheus.yml &
#ISREDIS/usr/local/redis_exporter/redis_exporter --redis.addr REDISIP:REDISPORT --redis.password "REDISPASSWORD" &
#ISNODE/usr/local/node_exporter-1.4.0/node_exporter &
#ISDOCKER/usr/local/cadvisor &
#ISMYSQL/usr/local/mysqld_exporter/mysqld_exporter --config.my-cnf="/usr/local/mysqld_exporter/.my.cnf" &
#ISNGINX/usr/local/cadvisor &
#ISMQjava -jar /usr/local/rocketmq-exporter-0.0.2-SNAPSHOT-exec.jar --rocketmq.config.namesrvAddr=MQIP:MQPORT -rocketmq.config.rocketmqVersion=V4_9_4 &
tail -f /dev/null
//start.sh是start.sh.demo通过启动脚本模板,经过restart.sh渲染生成
vim start.sh
#!/bin/bash
source /etc/profile &
/usr/local/grafana/bin/grafana-server -homepath /usr/local/grafana &
/usr/local/consul agent -dev -ui -node=consul-dev-14 -client=0.0.0.0 &
/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml &
/usr/local/prometheus/prometheus --web.enable-lifecycle --config.file=/usr/local/prometheus/prometheus.yml &
/usr/local/redis_exporter/redis_exporter --redis.addr 192.168.58.118:6379 --redis.password "Redis#2022" &
/usr/local/node_exporter-1.4.0/node_exporter &
/usr/local/cadvisor &
/usr/local/mysqld_exporter/mysqld_exporter --config.my-cnf="/usr/local/mysqld_exporter/.my.cnf" &
#ISNGINX/usr/local/cadvisor &
java -jar /usr/local/rocketmq-exporter-0.0.2-SNAPSHOT-exec.jar --rocketmq.config.namesrvAddr=192.168.58.126:9876 -rocketmq.config.rocketmqVersion=V4_9_4 &
tail -f /dev/null
restart.sh
#!/bin/bash
PWDPATH=`pwd`
EXPORTERCONF="application.conf"
STARTSCRIPT="start.sh"
MYSQLCONF=".my.cnf"
IMAGENAME="20221018:v01"
CONTAINTER="20221018"
JSONREDISNAME="redis_exporter.json"
JSONMQNAME="rocketmq_exporter.json"
JSONDOCKERNMAE="cAdvisor_exporter.json"
JSONMYSQLNAME="mysql_exporter.json"
JSONNODENAME="node_exporter.json"
GETVALUE() {
local key=$1
cat ${EXPORTERCONF} | grep ${key} | awk -F= '{print $2}'
}
RECOVERVALUE() {
local filename=$1
local oldvalue=$2
local newvalue=$3
sed -i "s/${oldvalue}/${newvalue}/g" ${filename}
}
COPYDAEMOFILE() {
rm -rf ${STARTSCRIPT};cp ${STARTSCRIPT}.demo ${STARTSCRIPT}
chmod 755 ${STARTSCRIPT}
}
DOCKERIMPL(){
local TYPE="Docker"
local DOCKERIP=`GETVALUE DOCKERIP`
local SWITCH=`GETVALUE ISDOCKER`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
LOG info "检测到${TYPE}配置为 ${SWITCH},开始加载${TYPE} Exporter";
#修改start.sh
RECOVERVALUE ${STARTSCRIPT} "#ISDOCKER" ""
LOG info "${TYPE}配置加载完毕"
fi
}
DOCKERREGISTRY() {
local TYPE="Docker"
local DOCKERIP=`GETVALUE DOCKERIP`
local EXPORTERDOCKERPORT=`GETVALUE EXPORTERDOCKERPORT`
local SWITCH=`GETVALUE ISDOCKER`
local CONSULIP=`GETVALUE LOCIPADDR`
local LOCIPADDR=`GETVALUE LOCIPADDR`
local CONSULPORT=`GETVALUE CONSULPORT`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
cp exporter_register/exporter_json/${JSONDOCKERNMAE} ./
RECOVERVALUE ${JSONDOCKERNMAE} "LOCIPADDR" "${LOCIPADDR}"
RECOVERVALUE ${JSONDOCKERNMAE} "DOCKERIP" "${DOCKERIP}"
RECOVERVALUE ${JSONDOCKERNMAE} "EXPORTERDOCKERPORT" "${EXPORTERDOCKERPORT}"
LOG info "${TYPE} Json文件处理完毕"
curl --request PUT --data @${JSONDOCKERNMAE} http://${CONSULIP}:${CONSULPORT}/v1/agent/service/register?replace-existing-checks=1
rm -rf ${JSONDOCKERNMAE}
LOG info "${TYPE} Json文件清理完毕"
LOG info "检测到${TYPE}配置为 ${SWITCH},开始注册${TYPE} Exporter";
fi
}
PORTDOCKER() {
local SWITCH=`GETVALUE ISDOCKER`
local EXPORTERDOCKERPORT=`GETVALUE EXPORTERDOCKERPORT`
if [[ $SWITCH == true ]];then
echo "-p ${EXPORTERDOCKERPORT}:8080"
fi
}
NGINXIMPL(){
local TYPE="Nginx"
local SWITCH=`GETVALUE ISNGINX`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
LOG info "检测到${TYPE}配置为 ${SWITCH},开始加载${TYPE} Exporter";
#修改start.sh
RECOVERVALUE ${STARTSCRIPT} "#ISNGINX" ""
LOG info "${TYPE}配置加载完毕"
fi
}
MYSQLIMPL(){
local TYPE="Mysql"
local SWITCH=`GETVALUE ISMYSQL`
local MYSQLIP=`GETVALUE MYSQLIP`
local MYSQLPORT=`GETVALUE MYSQLPORT`
local MYSQLUSER=`GETVALUE MYSQLUSER`
local MYSQLPASSWORD=`GETVALUE MYSQLPASSWORD`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
LOG info "检测到${TYPE}配置为 ${SWITCH},开始加载${TYPE} Exporter";
RECOVERVALUE ${MYSQLCONF} "MYSQLIP" "${MYSQLIP}"
RECOVERVALUE ${MYSQLCONF} "MYSQLPORT" "${MYSQLPORT}"
RECOVERVALUE ${MYSQLCONF} "MYSQLUSER" "${MYSQLUSER}"
RECOVERVALUE ${MYSQLCONF} "MYSQLPASSWORD" "${MYSQLPASSWORD}"
#修改start.sh
RECOVERVALUE ${STARTSCRIPT} "#ISMYSQL" ""
LOG info "${TYPE}配置加载完毕"
fi
}
MYSQLREGISTRY() {
local TYPE="Mysql"
local SWITCH=`GETVALUE ISMYSQL`
local MYSQLIP=`GETVALUE MYSQLIP`
local EXPORTERPORTMYSQL=`GETVALUE EXPORTERPORTMYSQL`
local LOCIPADDR=`GETVALUE LOCIPADDR`
local CONSULIP=`GETVALUE LOCIPADDR`
local CONSULPORT=`GETVALUE CONSULPORT`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
cp exporter_register/exporter_json/${JSONMYSQLNAME} ./
RECOVERVALUE ${JSONMYSQLNAME} "LOCIPADDR" "${LOCIPADDR}"
RECOVERVALUE ${JSONMYSQLNAME} "MYSQLIP" "${MYSQLIP}"
RECOVERVALUE ${JSONMYSQLNAME} "EXPORTERPORTMYSQL" "${EXPORTERPORTMYSQL}"
LOG info "${TYPE} Json文件处理完毕"
curl --request PUT --data @${JSONMYSQLNAME} http://${CONSULIP}:${CONSULPORT}/v1/agent/service/register?replace-existing-checks=1
rm -rf ${JSONMYSQLNAME}
LOG info "${TYPE} Json文件清理完毕"
LOG info "检测到${TYPE}配置为 ${SWITCH},开始注册${TYPE} Exporter";
fi
}
PORTMYSQL() {
local SWITCH=`GETVALUE ISMYSQL`
local EXPORTERPORTMYSQL=`GETVALUE EXPORTERPORTMYSQL`
if [[ $SWITCH == true ]];then
echo "-p ${EXPORTERPORTMYSQL}:9104"
fi
}
MQIMPL(){
local TYPE="Rocketmq"
local SWITCH=`GETVALUE ISMQ`
local MQIP=`GETVALUE MQIP`
local MQPORT=`GETVALUE MQPORT`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
LOG info "检测到${TYPE}配置为 ${SWITCH},开始加载${TYPE} Exporter";
#修改start.sh
RECOVERVALUE ${STARTSCRIPT} "#ISMQ" ""
RECOVERVALUE ${STARTSCRIPT} "MQIP" "${MQIP}"
RECOVERVALUE ${STARTSCRIPT} "MQPORT" "${MQPORT}"
LOG info "${TYPE}配置加载完毕"
fi
}
MQREGISTRY() {
local TYPE="Rocketmq"
local SWITCH=`GETVALUE ISMQ`
local MQIP=`GETVALUE MQIP`
local EXPORTERPORTMQ=`GETVALUE EXPORTERPORTMQ`
local LOCIPADDR=`GETVALUE LOCIPADDR`
local CONSULIP=`GETVALUE LOCIPADDR`
local CONSULPORT=`GETVALUE CONSULPORT`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
cp exporter_register/exporter_json/${JSONMQNAME} ./
RECOVERVALUE ${JSONMQNAME} "LOCIPADDR" "${LOCIPADDR}"
RECOVERVALUE ${JSONMQNAME} "MQIP" "${MQIP}"
RECOVERVALUE ${JSONMQNAME} "EXPORTERPORTMQ" "${EXPORTERPORTMQ}"
LOG info "${TYPE} Json文件处理完毕"
curl --request PUT --data @${JSONMQNAME} http://${CONSULIP}:${CONSULPORT}/v1/agent/service/register?replace-existing-checks=1
rm -rf ${JSONMQNAME}
LOG info "${TYPE} Json文件清理完毕"
fi
}
PORTMQ() {
local SWITCH=`GETVALUE ISMQ`
local EXPORTERPORTMQ=`GETVALUE EXPORTERPORTMQ`
if [[ $SWITCH == true ]];then
echo "-p ${EXPORTERPORTMQ}:5557"
fi
}
NODEIMPL(){
local TYPE="Node"
local SWITCH=`GETVALUE ISNODE`
local NODEIP=`GETVALUE NODEIP`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
LOG info "检测到${TYPE}配置为 ${SWITCH},开始加载${TYPE} Exporter";
#修改start.sh
RECOVERVALUE ${STARTSCRIPT} "#ISNODE" ""
LOG info "${TYPE}配置加载完毕"
fi
}
NODEREGISTRY() {
local TYPE="Node"
local SWITCH=`GETVALUE ISNODE`
local NODEIP=`GETVALUE NODEIP`
local EXPORTERNODEPORT=`GETVALUE EXPORTERNODEPORT`
local LOCIPADDR=`GETVALUE LOCIPADDR`
local CONSULIP=`GETVALUE LOCIPADDR`
local CONSULPORT=`GETVALUE CONSULPORT`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
cp exporter_register/exporter_json/${JSONNODENAME} ./
RECOVERVALUE ${JSONNODENAME} "LOCIPADDR" "${LOCIPADDR}"
RECOVERVALUE ${JSONNODENAME} "NODEIP" "${NODEIP}"
RECOVERVALUE ${JSONNODENAME} "EXPORTERNODEPORT" "${EXPORTERNODEPORT}"
LOG info "${TYPE} Json文件处理完毕"
curl --request PUT --data @${JSONNODENAME} http://${CONSULIP}:${CONSULPORT}/v1/agent/service/register?replace-existing-checks=1
rm -rf ${JSONNODENAME}
LOG info "${TYPE} Json文件清理完毕"
LOG info "检测到${TYPE}配置为 ${SWITCH},开始注册${TYPE} Exporter";
fi
}
PORTNODE() {
local SWITCH=`GETVALUE ISNODE`
local EXPORTERNODEPORT=`GETVALUE EXPORTERNODEPORT`
if [[ $SWITCH == true ]];then
echo "-p ${EXPORTERNODEPORT}:9100"
fi
}
LOG(){
local log_level=$1
local log_info=$2
local line=$3
local script_name=$(basename $0)
case ${log_level} in
"info")
echo -e "\033[32m$(date "+%Y-%m-%d %T.%N") [INFO]: ${log_info}\033[0m";;
"warn")
echo -e "\033[33m$(date "+%Y-%m-%d %T.%N") [WARN]: ${log_info}\033[0m";;
"error")
echo -e "\033[31m$(date "+%Y-%m-%d %T.%N") [ERROR ${script_name} ${FUNCNAME[1]}:$line]: ${log_info}\033[0m";;
*)
echo -e "${@}"
;;
esac
}
REDISIMPL(){
local TYPE="Redis"
local SWITCH=`GETVALUE ISREDIS`
local REDISIP=`GETVALUE REDISIP`
local REDISPORT=`GETVALUE REDISPORT`
local REDISPASSWORD=`GETVALUE REDISPASSWORD`
local EXPORTERPORTREDIS=`GETVALUE EXPORTERPORTREDIS`
local CONSULIP=`GETVALUE LOCIPADDR`
local CONSULPORT=`GETVALUE CONSULPORT`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
LOG info "检测到${TYPE}配置为 ${SWITCH},开始加载${TYPE} Exporter";
#修改start.sh
RECOVERVALUE ${STARTSCRIPT} "#ISREDIS" ""
RECOVERVALUE ${STARTSCRIPT} "REDISIP" "${REDISIP}"
RECOVERVALUE ${STARTSCRIPT} "REDISPORT" "${REDISPORT}"
RECOVERVALUE ${STARTSCRIPT} "REDISPASSWORD" "${REDISPASSWORD}"
LOG info "${TYPE}配置加载完毕"
fi
}
REDISREGISTRY() {
local TYPE="Redis"
local SWITCH=`GETVALUE ISREDIS`
local REDISIP=`GETVALUE REDISIP`
local EXPORTERPORTREDIS=`GETVALUE EXPORTERPORTREDIS`
local LOCIPADDR=`GETVALUE LOCIPADDR`
local CONSULIP=`GETVALUE LOCIPADDR`
local CONSULPORT=`GETVALUE CONSULPORT`
if [[ $SWITCH == false ]];then
LOG info "检测到${TYPE}配置为 ${SWITCH},关闭${TYPE} Exporter";
else
cp exporter_register/exporter_json/${JSONREDISNAME} ./
RECOVERVALUE ${JSONREDISNAME} "LOCIPADDR" "${LOCIPADDR}"
RECOVERVALUE ${JSONREDISNAME} "REDISIP" "${REDISIP}"
RECOVERVALUE ${JSONREDISNAME} "EXPORTERPORTREDIS" "${EXPORTERPORTREDIS}"
LOG info "${TYPE} Json文件处理完毕"
curl --request PUT --data @${JSONREDISNAME} http://${CONSULIP}:${CONSULPORT}/v1/agent/service/register?replace-existing-checks=1
rm -rf ${JSONREDISNAME}
LOG info "${TYPE} Json文件清理完毕"
LOG info "检测到${TYPE}配置为 ${SWITCH},开始注册${TYPE} Exporter";
fi
}
PORTREDIS() {
local SWITCH=`GETVALUE ISREDIS`
local EXPORTERPORTREDIS=`GETVALUE EXPORTERPORTREDIS`
if [[ $SWITCH == true ]];then
echo "-p ${EXPORTERPORTREDIS}:9121"
fi
}
PORTPROMETHEUS() {
local GRAFANAPORT=`GETVALUE GRAFANAPORT`
local CONSULPORT=`GETVALUE CONSULPORT`
local ALTERPORT=`GETVALUE ALTERPORT`
local PROMETHEUSPORT=`GETVALUE PROMETHEUSPORT`
echo "-p ${ALTERPORT}:9093 -p ${PROMETHEUSPORT}:9090 -p ${CONSULPORT}:8500 -p ${GRAFANAPORT}:3000"
}
CONFIGPROMEFILE() {
LOG info "替换Prometheues配置文件"
rm -rf ${PWDPATH}/data/prometheus/conf/prometheus.yml
cp ${PWDPATH}/data/prometheus/conf/prometheus.yml.bak ./prometheus.yml
local PROMEFILE="prometheus.yml"
local CONSULPORT=`GETVALUE CONSULPORT`
local ALTERPORT=`GETVALUE ALTERPORT`
RECOVERVALUE ${PROMEFILE} "LOCIPADDR" "${LOCIPADDR}"
RECOVERVALUE ${PROMEFILE} "CONSULPORT" "${CONSULPORT}"
RECOVERVALUE ${PROMEFILE} "ALTERPORT" "${ALTERPORT}"
mv -f ./prometheus.yml ${PWDPATH}/data/prometheus/conf/prometheus.yml
LOG info "修改Prometheus配置完毕"
}
#读取配置文件并加载各个组件的配置
STARTALL() {
LOG info "预加载中..."
CONFIGPROMEFILE
COPYDAEMOFILE
LOG info "配置加载中..."
REDISIMPL
NODEIMPL
DOCKERIMPL
MQIMPL
MYSQLIMPL
LOG info "所有配置加载已完毕"
# LOG info "打包镜像中..."
# docker build -f Dockerfile -t ${IMAGENAME} .
# LOG info "打包镜像完毕"
LOG info "启动中..."
docker rm -f ${CONTAINTER} 2&>1
docker run -itd --name ${CONTAINTER} `PORTPROMETHEUS` `PORTNODE` `PORTREDIS` `PORTDOCKER` `PORTMQ` `PORTMYSQL` \
-v /proc:/host/proc:ro -v /sys:/host/sys:ro -v /:/rootfs:ro \
-v /var/run:/var/run:rw -v /sys:/sys:ro -v /var/lib/docker/:/var/lib/docker:ro \
-v /dev/disk/:/dev/disk:ro -v /consul/data:/consul/data \
-v ${PWDPATH}/data/prometheus/conf/prometheus.yml:/usr/local/prometheus/prometheus.yml \
-v ${PWDPATH}/data/prometheus/data:/prometheus \
-v ${PWDPATH}/data/prometheus/rules:/opt/rule \
-v ${PWDPATH}/data/grafana/conf/grafana.ini:/etc/grafana/grafana.ini \
-v ${PWDPATH}/data/grafana/data:/var/lib/grafana \
-v ${PWDPATH}/data/alertmanager/conf/alertmanager.yml:/etc/alertmanager/alertmanager.yml \
-v ${PWDPATH}/data/alertmanager/alertmanager/data/:/alertmanager \
-v ${PWDPATH}/data/alertmanager/template/:/opt/template/ \
${IMAGENAME}
LOG info "启动脚本运行结束"
}
#-v ${PWDPATH}/data/consul/data/:/consul/data/ \
#让consul启动后5秒,开始向consul中注册exporter对应的json
REGISTRY() {
LOG info "请等待5秒,Consul尚未启动"
sleep 5
REDISREGISTRY
MQREGISTRY
NODEREGISTRY
MYSQLREGISTRY
DOCKERREGISTRY
}
STARTALL
REGISTRY
application.conf
//整体配置文件,restart.sh渲染配置文件和start.sh的时候会用到
vim application.conf
###初步只实现单例模式
[redis]
ISREDIS=true
REDISIP=192.168.58.118
REDISPORT=6379
REDISPASSWORD=Redis#2022
EXPORTERPORTREDIS=9121
[node]
ISNODE=true
NODEIP=192.168.61.187
EXPORTERNODEPORT=9100
[docker]
ISDOCKER=true
DOCKERIP=192.168.61.187
EXPORTERDOCKERPORT=8083
[nginx]
ISNGINX=true
EXPORTERNGINXRPORT=9113
[mq]
ISMQ=true
MQIP=192.168.58.126
MQPORT=9876
EXPORTERPORTMQ=5558
[mysql]
ISMYSQL=true
MYSQLIP=192.168.58.134
MYSQLPORT=3306
MYSQLUSER=ecs_cs
MYSQLPASSWORD=ecs_cs
EXPORTERPORTMYSQL=9104
###全局变量,不需要修改
[cover]
LOCIPADDR=192.168.61.187
GRAFANAPORT=3001
CONSULPORT=8501
ALTERPORT=9094
PROMETHEUSPORT=9091
启动
//启动时,只需要执行restart.sh就可以实现启动自动部署,自动注册
bash restart.sh
向consul中注册的json配置
组件配置就没那么重要了,毕竟每个版本配置都有可能不一样,上面脚本只提供参考,安装包和配置文件可以自己定义,下面是本次安装的部分配置
因为prometheus是向consul中读取exporter的信息,所以需要curl向consul注册,对应上面restart.sh里面的REGISTRY方法
cAdvisor_exporter.json
curl 向consul中注册docker时会用到
cat exporter_register/exporter_json/cAdvisor_exporter.json
{
"ID": "cAdvisor-exporter-DOCKERIP",
"Name": "cAdvisor-exporter-DOCKERIP",
"Tags": [
"cAdvisor-exporter"
],
"Address": "DOCKERIP",
"Port": EXPORTERDOCKERPORT,
"Meta": {
"app": "cAdvisor-exporter-$",
"team": "cAdvisor-group"
},
"EnableTagOverride": false,
"Check": {
"HTTP": "http://LOCIPADDR:EXPORTERDOCKERPORT/metrics",
"Interval": "10s"
},
"Weights": {
"Passing": 10,
"Warning": 1
}
}
mysql_exporter.json
curl 向consul中注册mysql时会用到
cat exporter_register/exporter_json/mysql_exporter.json
{
"ID": "mysql-exporter-MYSQLIP",
"Name": "mysql-exporter-MYSQLIP",
"Tags": [
"mysql-exporter"
],
"Address": "MYSQLIP",
"Port": EXPORTERPORTMYSQL,
"Meta": {
"app": "mysql-exporter",
"team": "mysql-group",
"serviceName": "ecs"
},
"EnableTagOverride": false,
"Check": {
"HTTP": "http://LOCIPADDR:EXPORTERPORTMYSQL/metrics",
"Interval": "10s"
},
"Weights": {
"Passing": 10,
"Warning": 1
}
}
nginx_exporter.json
curl 向consul中注册docker时会用到,貌似不需要exporter,我这里没加exporter和这个监控
cat exporter_register/exporter_json/nginx_exporter.json
{
"ID": "nginx-monitor-$ip",
"Name": "nginx-monitor-$ip",
"Tags": [
"nginx-monitor"
],
"Address": "192.168.61.187",
"Port": 80,
"Meta": {
"app": "nginx-monitor",
"team": "nginx-group"
},
"EnableTagOverride": false,
"Check": {
"HTTP": "http://192.168.61.187/status/format/prometheus",
"Interval": "10s"
},
"Weights": {
"Passing": 10,
"Warning": 1
}
}
node_exporter.json
curl 向consul中注册node时会用到
cat exporter_register/exporter_json/node_exporter.json
{
"ID": "node-exporter-NODEIP",
"Name": "node-exporter-NODEIP",
"Tags": [
"node-exporter"
],
"Address": "NODEIP",
"Port": EXPORTERNODEPORT,
"Meta": {
"app": "node-exporter-app",
"team": "node-team",
"serviceName": "ecs"
},
"EnableTagOverride": false,
"Check": {
"HTTP": "http://LOCIPADDR:EXPORTERNODEPORT/metrics",
"Interval": "10s"
},
"Weights": {
"Passing": 10,
"Warning": 1
}
}
redis_exporter.json
curl 向consul中注册redis时会用到
cat exporter_register/exporter_json/redis_exporter.json
{
"ID": "redis-exporter-REDISIP",
"Name": "redis-exporter-REDISIP",
"Tags": [
"redis-exporter"
],
"Address": "REDISIP",
"Port": EXPORTERPORTREDIS,
"Meta": {
"app": "redis-exporter-$ip",
"team": "redis-group",
"serviceName": "ecs"
},
"EnableTagOverride": false,
"Check": {
"HTTP": "http://LOCIPADDR:EXPORTERPORTREDIS/metrics",
"Interval": "10s"
},
"Weights": {
"Passing": 10,
"Warning": 1
}
}
rocketmq_exporter.json
curl 向consul中注册rocketmq时会用到
cat exporter_register/exporter_json/rocketmq_exporter.json
{
"ID": "rocketmq-exporter-MQIP",
"Name": "rocketmq-exporter-MQIP",
"Tags": [
"rocketmq-exporter"
],
"Address": "MQIP",
"Port": EXPORTERPORTMQ,
"Meta": {
"app": "rocketmq-exporter-MQIP",
"team": "rocketmq-group"
},
"EnableTagOverride": false,
"Check": {
"HTTP": "http://LOCIPADDR:EXPORTERPORTMQ/metrics",
"Interval": "10s"
},
"Weights": {
"Passing": 10,
"Warning": 1
}
}
各个组件配置文件
promethue的配置文件
//prometheus的配置文件模板,经过restart.sh渲染生成 prometheus.yml
cat data/prometheus/conf/prometheus.yml.bak
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
#scrape_timeout is set to the global default (10s).
#scrape_timeout: 30s
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- LOCIPADDR:ALTERPORT
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- /opt/rules/*.yml
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'node-group'
consul_sd_configs:
- server: 'LOCIPADDR:CONSULPORT'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: node-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
- job_name: 'redis-group'
consul_sd_configs:
- server: 'LOCIPADDR:CONSULPORT'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: redis-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
- job_name: 'mysql'
consul_sd_configs:
- server: 'LOCIPADDR:CONSULPORT'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: mysql-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
- job_name: 'cAdvisor-group'
scrape_interval: 35s
scrape_timeout: 30s
consul_sd_configs:
- server: 'LOCIPADDR:CONSULPORT'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: cAdvisor-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
- job_name: 'nginx-group'
metrics_path: /status/format/prometheus
consul_sd_configs:
- server: 'LOCIPADDR:CONSULPORT'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: nginx-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
- job_name: 'rocketmq-group'
consul_sd_configs:
- server: 'LOCIPADDR:CONSULPORT'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: rocketmq-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
//prometheus.yml 经过restart.sh渲染后的配置
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
#scrape_timeout is set to the global default (10s).
#scrape_timeout: 30s
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- :9094
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- /opt/rules/*.yml
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'node-group'
consul_sd_configs:
- server: ':8501'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: node-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
- job_name: 'redis-group'
consul_sd_configs:
- server: ':8501'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: redis-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
- job_name: 'mysql'
consul_sd_configs:
- server: ':8501'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: mysql-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
- job_name: 'cAdvisor-group'
scrape_interval: 35s
scrape_timeout: 30s
consul_sd_configs:
- server: ':8501'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: cAdvisor-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
- job_name: 'nginx-group'
metrics_path: /status/format/prometheus
consul_sd_configs:
- server: ':8501'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: nginx-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
- job_name: 'rocketmq-group'
consul_sd_configs:
- server: ':8501'
services: []
relabel_configs:
- source_labels: [__meta_consul_service_metadata_team]
regex: rocketmq-group
action: keep
- regex: __meta_consul_service_metadata_(.*)
action: labelmap
- source_labels: [__meta_consul_service_id ]
target_label: instance
prometheus的 node_rules.yml告警规则
cat data/prometheus/rules/node_rules.yml
groups:
- name: node-status
rules:
- alert: 节点内存已满(剩余 < 40%)
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 40
for: 15s
labels:
severity: warning
team: node
status: 非常严重
annotations:
summary: Host out of memory (instance {{ $labels.instance }})
description: "主机内存剩余小于40%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: 实例宕机
expr: up == 0
for: 20s
labels:
severity: 1
team: node
#status: 非常严重
annotations:
summary: "{{$labels.instance}} 实例已经宕机超过15秒"
alert 配置
cat data/alertmanager/conf/alertmanager.yml
global:
#每5分钟检查一次是否恢复
resolve_timeout: 5m
smtp_smarthost: 'smtp.qq.com:465'
smtp_from: '[email protected]'
smtp_auth_username: '[email protected]'
smtp_auth_password: 'iwkyjsdasdawgjab'
smtp_require_tls: false
templates:
- '/opt/template/*.tmpl'
route:
# 将传入的报警中有这些标签的分为一个组
# 比如, cluster=A 和 alertname=LatencyHigh 会分成一个组
group_by: ['instance']
# 指分组创建多久后才可以发送压缩的警报,也就是初次发警报的延时
# 这样会确保第一次通知的时候, 有更多的报警被压缩在一起
group_wait: 10s
# 当第一个通知发送,等待多久发送压缩的警报
group_interval: 30s
# 如果报警发送成功, 等待多久重新发送一次
repeat_interval: 30s
receiver: 'mail'
receivers:
- name: 'mail'
email_configs:
#- to: '{{ template "email.to" . }}'
#html: '{{ template "email.to.html" . }}'
#send_resolved: true
- to: '[email protected]'
html: '{{ template "email.html" . }}'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
alert 邮件模型
cat data/alertmanager/template/email.tmpl
{{ define "email.html" }}
{{- if gt (len .Alerts.Firing) 0 -}}
{{ range $i, $alert := .Alerts }}
========= ERROR ==========<br>
告警名称:{{ .Labels.alertname }}<br>
告警级别:{{ .Labels.severity }}<br>
告警机器:{{ .Labels.instance }} {{ .Labels.device }}<br>
告警详情:{{ .Annotations.summary }}<br>
告警说明:{{ .Annotations.description }}<br>
告警时间:{{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}<br>
========= END ==========<br>
{{ end }}
{{ end }}
{{- if gt (len .Alerts.Resolved) 0 -}}
{{ range $i, $alert := .Alerts }}
========= INFO ==========<br>
告警名称:{{ .Labels.alertname }}<br>
告警级别:{{ .Labels.severity }}<br>
告警机器:{{ .Labels.instance }}<br>
告警详情:{{ .Annotations.summary }}<br>
告警说明:{{ .Annotations.description }}<br>
告警时间:{{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}<br>
恢复时间:{{ (.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}<br>
========= END ==========<br>
{{ end }}
{{ end }}
{{ end }}
grafana 配置
大部分是默认配置,检查下192开头的配置修改即可
cat data/grafana/conf/grafana.ini
app_mode = production
instance_name = ${HOSTNAME}
[paths]
data = /data/grafana
temp_data_lifetime = 24h
logs = /data/grafana/log
plugins = /data/grafana/plugins
provisioning = /data/grafana/conf/provisioning
[server]
protocol = http
http_addr =
http_port = 3000
domain = localhost
enforce_domain = false
root_url =http://192.168.61.187:3000
serve_from_sub_path = false
router_logging = false
static_root_path = public
enable_gzip = false
cert_file =/etc/grafana/server.crt
cert_key =/etc/grafana/server.key
socket = /tmp/grafana.sock
[database]
type = sqlite3
host = 127.0.0.1:3306
name = grafana
user = root
password =
url =
max_idle_conn = 2
max_open_conn =
conn_max_lifetime = 14400
log_queries =
ssl_mode = disable
ca_cert_path =
client_key_path =
client_cert_path =
server_cert_name =
path = grafana.db
cache_mode = private
[remote_cache]
type = database
connstr =
[dataproxy]
logging = false
timeout = 30
keep_alive_seconds = 30
tls_handshake_timeout_seconds = 10
expect_continue_timeout_seconds = 1
max_idle_connections = 100
idle_conn_timeout_seconds = 90
send_user_header = false
[analytics]
reporting_enabled = true
check_for_updates = true
google_analytics_ua_id =
google_tag_manager_id =
[security]
disable_initial_admin_creation = false
admin_user = admin
admin_password = admin
secret_key = SW2YcwTIb9zpOOhoPsMm
disable_gravatar = false
data_source_proxy_whitelist =
disable_brute_force_login_protection = false
cookie_secure = false
cookie_samesite = lax
allow_embedding = true
strict_transport_security = false
strict_transport_security_max_age_seconds = 86400
strict_transport_security_preload = false
strict_transport_security_subdomains = false
x_content_type_options = true
x_xss_protection = true
[snapshots]
external_enabled = true
external_snapshot_url = https://snapshots-origin.raintank.io
external_snapshot_name = Publish to snapshot.raintank.io
public_mode = false
snapshot_remove_expired = true
[dashboards]
versions_to_keep = 20
min_refresh_interval = 5s
default_home_dashboard_path =
[users]
allow_sign_up = false
allow_org_create = false
auto_assign_org = true
auto_assign_org_id = 1
auto_assign_org_role = Viewer
verify_email_enabled = false
login_hint = email or username
password_hint = password
default_theme = light
external_manage_link_url =
external_manage_link_name =
external_manage_info =
viewers_can_edit = false
editors_can_admin = false
user_invite_max_lifetime_duration = 24h
[auth]
login_cookie_name = grafana_session
login_maximum_inactive_lifetime_duration =
login_maximum_lifetime_duration =
token_rotation_interval_minutes = 10
disable_login_form = false
disable_signout_menu = false
signout_redirect_url =
oauth_auto_login = false
oauth_state_cookie_max_age = 600
api_key_max_seconds_to_live = -1
sigv4_auth_enabled = false
[auth.anonymous]
enabled = false
org_name = Main Org.
org_role = Viewer
hide_version = false
[auth.github]
enabled = false
allow_sign_up = true
client_id = some_id
client_secret =
scopes = user:email,read:org
auth_url = https://github.com/login/oauth/authorize
token_url = https://github.com/login/oauth/access_token
api_url = https://api.github.com/user
allowed_domains =
team_ids =
allowed_organizations =
[auth.gitlab]
enabled = false
allow_sign_up = true
client_id = some_id
client_secret =
scopes = api
auth_url = https://gitlab.com/oauth/authorize
token_url = https://gitlab.com/oauth/token
api_url = https://gitlab.com/api/v4
allowed_domains =
allowed_groups =
[auth.google]
enabled = false
allow_sign_up = true
client_id = some_client_id
client_secret =
scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email
auth_url = https://accounts.google.com/o/oauth2/auth
token_url = https://accounts.google.com/o/oauth2/token
api_url = https://www.googleapis.com/oauth2/v1/userinfo
allowed_domains =
hosted_domain =
[auth.grafananet]
enabled = false
allow_sign_up = true
client_id = some_id
client_secret =
scopes = user:email
allowed_organizations =
[auth.grafana_com]
enabled = false
allow_sign_up = true
client_id = some_id
client_secret =
scopes = user:email
allowed_organizations =
[auth.azuread]
name = Azure AD
enabled = false
allow_sign_up = true
client_id = some_client_id
client_secret =
scopes = openid email profile
auth_url = https://login.microsoftonline.com/<tenant-id>/oauth2/v2.0/authorize
token_url = https://login.microsoftonline.com/<tenant-id>/oauth2/v2.0/token
allowed_domains =
allowed_groups =
[auth.okta]
name = Okta
enabled = false
allow_sign_up = true
client_id = some_id
client_secret =
scopes = openid profile email groups
auth_url = https://<tenant-id>.okta.com/oauth2/v1/authorize
token_url = https://<tenant-id>.okta.com/oauth2/v1/token
api_url = https://<tenant-id>.okta.com/oauth2/v1/userinfo
allowed_domains =
allowed_groups =
role_attribute_path =
[auth.generic_oauth]
name = OAuth
enabled = true
allow_sign_up = true
client_id = some_id
client_secret =some_secret
scopes = user:email,read:org
email_attribute_name = email:primary
email_attribute_path =
login_attribute_path =
role_attribute_path =
id_token_attribute_name =
auth_url =http://192.168.61.187:8099/grafana-oauth2-monitor/login/oauth/authorize
token_url =http://192.168.61.187:8099/grafana-oauth2-monitor/login/oauth/token
api_url =http://192.168.61.187:8099/grafana-oauth2-monitor/login/oauth/userinfo
allowed_domains =
team_ids =
allowed_organizations =
tls_skip_verify_insecure = false
tls_client_cert =
tls_client_key =
tls_client_ca =
[auth.basic]
enabled = true
[auth.proxy]
enabled = false
header_name = X-WEBAUTH-USER
header_property = username
auto_sign_up = true
ldap_sync_ttl = 60
sync_ttl = 60
whitelist =
headers =
enable_login_token = false
[auth.ldap]
enabled = false
config_file = /etc/grafana/ldap.toml
allow_sign_up = true
sync_cron = "0 0 1 * * *"
active_sync_enabled = true
[smtp]
enabled = false
host = localhost:25
user =
password =
cert_file =
key_file =
skip_verify = false
from_address = [email protected]
from_name = Grafana
ehlo_identity =
startTLS_policy =
[emails]
welcome_email_on_sign_up = false
templates_pattern = emails/*.html
[log]
mode = console file
level = info
filters =
[log.console]
level =
format = console
[log.file]
level =
format = text
log_rotate = true
max_lines = 1000000
max_size_shift = 28
daily_rotate = true
max_days = 7
[log.syslog]
level =
format = text
network =
address =
facility =
tag =
[quota]
enabled = false
org_user = 10
org_dashboard = 100
org_data_source = 10
org_api_key = 10
user_org = 10
global_user = -1
global_org = -1
global_dashboard = -1
global_api_key = -1
global_session = -1
[alerting]
enabled = true
execute_alerts = true
error_or_timeout = alerting
nodata_or_nullvalues = no_data
concurrent_render_limit = 5
evaluation_timeout_seconds = 30
notification_timeout_seconds = 30
max_attempts = 3
min_interval_seconds = 1
max_annotation_age =
max_annotations_to_keep =
[annotations.dashboard]
max_age =
max_annotations_to_keep =
[annotations.api]
max_age =
max_annotations_to_keep =
[explore]
enabled = true
[metrics]
enabled = true
interval_seconds = 10
disable_total_stats = false
basic_auth_username =
basic_auth_password =
[metrics.environment_info]
[metrics.graphite]
address =
prefix = prod.grafana.%(instance_name)s.
[grafana_net]
url = https://grafana.com
[grafana_com]
url = https://grafana.com
[tracing.jaeger]
address =
always_included_tag =
sampler_type = const
sampler_param = 1
zipkin_propagation = false
disable_shared_zipkin_spans = false
[external_image_storage]
provider =
[external_image_storage.s3]
endpoint =
path_style_access =
bucket_url =
bucket =
region =
path =
access_key =
secret_key =
[external_image_storage.webdav]
url =
username =
password =
public_url =
[external_image_storage.gcs]
key_file =
bucket =
path =
enable_signed_urls = false
signed_url_expiration =
[external_image_storage.azure_blob]
account_name =
account_key =
container_name =
[external_image_storage.local]
[rendering]
server_url =
callback_url =
concurrent_render_request_limit = 30
[panels]
enable_alpha = false
disable_sanitize_html = false
[plugins]
enable_alpha = false
app_tls_skip_verify_insecure = false
allow_loading_unsigned_plugins =
[plugin.grafana-image-renderer]
rendering_timezone =
rendering_language =
rendering_viewport_device_scale_factor =
rendering_ignore_https_errors =
rendering_verbose_logging =
rendering_dumpio =
rendering_args =
rendering_chrome_bin =
rendering_mode =
rendering_clustering_mode =
rendering_clustering_max_concurrency =
rendering_viewport_max_width =
rendering_viewport_max_height =
rendering_viewport_max_device_scale_factor =
grpc_host =
grpc_port =
[enterprise]
license_path =
[feature_toggles]
enable =
[date_formats]
full_date = YYYY-MM-DD HH:mm:ss
interval_second = HH:mm:ss
interval_minute = HH:mm
interval_hour = MM/DD HH:mm
interval_day = MM/DD
interval_month = YYYY-MM
interval_year = YYYY
use_browser_locale = false
default_timezone = browser
consul配置
没什么配置,数据注意挂载出来就可以
主要配置上面都有了,其他的没什么了,都是一些官网的镜像了