背景
很多项目都需要调用到第三方的接口,曾经就有调用第三方接口慢,大量超时响应的请求不断堆积,造成服务不可用,间接堵塞了我司服务的整条依赖链,最后导致整个业务系统雪崩。不管是突发大流量还是第三方问题,为解决该问题,便开始研究熔断降级。在不改变原有代码结构的前提下,需实现外部接口的熔断降级功能,保护我方业务服务不雪崩,也保护上游业务服务。
熔断器原理
调用请求成功,熔断器处于关闭状态,且处于检测状态。
调用请求失败,当故障达到一定限制(计数、超时等),熔断打开,快速失败返回兜底信息。
调用请求恢复,熔断器处于半开状态,有限数量的请求被允许通过熔断器调用,当成功请求达到一定数量,判定故障恢复,熔断器关闭;当有限数量的请求有失败的,判定为故障依旧,继续打开熔断。
半开状态(重试机制)有助于防止恢复服务后,接口突然又被大量的请求淹没。最大限度减少了故障恢复对系统性能的影响。
openresty熔断降级
基于openresty的url_fuse(url熔断器)
https://github.com/sunsky/URL-fuse
熔断的条件
openresty的接口文档
参考:
https://openresty-reference.readthedocs.io/en/latest/Lua_Nginx_API/
一键部署openresty
点击查看代码
#!/bin/bash
# auth:chenjf
# func:install openresty standalone
# version:v1.1
# sys:CentOS Linux release 7.6.1810 (Core)
# nginx version:openresty-1.21.4.1
PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin
##要用root安装
[ $(id -u) -gt 0 ] && echo "please use root to execute the script!" && exit 1
#set -e
path=$(cd $(dirname $0); pwd)
install_path=/data
openresty_home=$install_path/openresty
pcre_home=/usr/local/pcre-8.44
zlib_home=/usr/local/zlib-1.2.11
openssl_home=/usr/local/openssl
openresty_pkg=openresty-1.21.4.1.tar.gz
openresty_pkg_dir=`echo $openresty_pkg |cut -d '.' -f -4`
del_ng(){
read -n3 -p "openresty_home already exists,Do you want to delete and reinstall it? please set yes or no [Y/N][y/n]?" aaa
case $aaa in
Y|y|yes)
sudo rm -rf $openresty_home
echo "openresty_home remove successful ";;
N|n|no)
echo "ok,bye bye~~"
exit 0;;
*)
echo "answer yes or no [Y/N][y/n] ,please.."
del_ng;;
esac
}
if [ ! -d $openresty_home ];then
echo "openresty_home does not exist,start to install........"
else
del_ng
fi
##用yum安装依赖包
#sudo yum repolist
sudo yum -y install gcc gcc-c++ automake
yum install epel-release -y
yum install ccache -y
##创建nginx组和用户
groupadd nginx
useradd -g nginx nginx
##编译安装nginx
sudo tar -zxf $path/$openresty_pkg
domake1(){
./configure --prefix=$openresty_home \
--with-cc-opt="-I/usr/local/openssl/include/ -I/usr/local/pcre-8.44/include/ -I/usr/local/zlib-1.2.11/include/" \
--with-ld-opt="-L/usr/local/openssl/lib/ -L/usr/local/pcre-8.44/ -L/usr/local/zlib-1.2.11/" \
--with-cc='ccache gcc -fdiagnostics-color=always' \
--with-pcre-jit \
--with-stream \
--with-stream_ssl_module \
--with-stream_ssl_preread_module \
--with-http_v2_module \
--without-mail_pop3_module \
--without-mail_imap_module \
--without-mail_smtp_module \
--with-http_stub_status_module \
--with-http_realip_module \
--with-http_addition_module \
--with-http_auth_request_module \
--with-http_secure_link_module \
--with-http_random_index_module \
--with-http_gzip_static_module \
--with-http_sub_module \
--with-http_dav_module \
--with-http_flv_module \
--with-http_mp4_module \
--with-http_gunzip_module \
--with-threads \
--with-compat \
--with-stream \
--with-http_ssl_module
}
cd $path/$openresty_pkg_dir
domake1
sudo gmake
sudo gmake install
sudo cp -av $openresty_home/nginx/conf/nginx.conf $openresty_home/nginx/conf/nginx.conf_bak
sudo cp -av $path/nginx.conf $openresty_home/nginx/conf/
##加入url_fuse
sudo cp -av $path/URL-fuse/url_fuse.lua $openresty_home/lualib/url_fuse.lua
sudo cp -av $path/URL-fuse/lib.lua $openresty_home/lualib/lib.lua
##每天切割日志
cp -a $path/cut-nginx-log.sh_demo $path/cut-nginx-log.sh
sed -i "s!openresty_home!$openresty_home!g" $path/cut-nginx-log.sh
chmod a+x $path/cut-nginx-log.sh
\cp -a $path/cut-nginx-log.sh $openresty_home/nginx/logs/
mkdir -p /var/spool/cron/
egrep 'cut-nginx-log.sh' /var/spool/cron/root >/dev/null 2>&1
if [ $? -ne 0 ];then
echo "0 0 */1 * * $openresty_home/nginx/logs/cut-nginx-log.sh" >> /var/spool/cron/root
else
echo "cut-nginx-log.sh was installed"
fi
##nginx文件归属
sudo chown -R nginx:nginx $openresty_home
##加入service
sed -i "s!openresty_home!$openresty_home!g" $path/openresty.service
\cp $path/openresty.service /usr/lib/systemd/system/openresty.service
systemctl enable openresty.service
systemctl start openresty.service
systemctl status openresty.service
echo "openresty install successfully!!!"
实验开始
模拟上游服务nginx配置(启动个nginx:9999,并模拟2个接口)
location /abc001 {
default_type application/json;
return 200 '{"Response-Desc":"success","API-Status":"00","Response-Body":"{\\\"data\\\":\\\"{\\\\\\\"xsdsdv\\\\\\\":{},\\\\\\\"healthyStaus\\\\\\\":\\\\\\\"88\\\\\\\"}\\\"}","Response-Code":"000000000000","url_fuse":"no"}';
}
location /result/getInfo/v1.2.0 {
default_type application/json;
return 200 '{"code":"200","data":"","message":"对不起没有查询到相关信息","url_fuse":"no"}'
}
ps:
1.为更直观的实验效果,这里的报文加了"url_fuse":"no"表示服务正常,没被熔断。
2.注意多层转义,实际生产中,应用程序收到openresty响应过来的报文,要多一层转义。这样应用程序才能正常接收响应报文。
Openresty配置(nginx.conf)
worker_processes auto;
error_log logs/error.log;
events {
worker_connections 10240;
}
http {
log_format main '$remote_addr - - [$time_local] "$request" '
'$status $body_bytes_sent '
'$upstream_cache_status $upstream_addr $request_time $upstream_response_time ';
access_log logs/access.log main;
#access_log logs/access.log main buffer=64k flush=10s;
lua_package_path '/data/openresty/lua/?.lua;;';
lua_shared_dict fuse_shard_dict 10m;
# proxy_ignore_client_abort on;
init_worker_by_lua_block {
local fuse = require "url_fuse"
local resps = {}
resps['/abc001'] = '{"Response-Desc":"success","API-Status":"00","Response-Body":"{\\\"data\\\":\\\"{\\\\\\\"xsdsdv\\\\\\\":{},\\\\\\\"healthyStaus\\\\\\\":\\\\\\\"88\\\\\\\"}\\\"}","Response-Code":"000000000000","url_fus1":"yes"}'
resps['/abc002/1'] = '{"code":"10","data":"{\\\"head\\\":{\\\"message\\\":\\\"接口调用成功\\\",\\\"status\\\":\\\"0\\\"},\\\"data\\\":{\\\"person\\\":{},\\\"sex\\\":{},\\\"healthyReport\\\":[{\\\"healthyId\\\":\\\"325f25f34efesdsa1a2392248cf28651\\\",\\\"etVersion\\\":9,\\\"healthyStaus\\\":\\\"99\\\",\\\"date\\\":\\\"2023-03-18\\\",\\\"dataSource\\\":\\\"福建省\\\"}],\\\"Reports\\\":[],\\\"sfxtex\\\":[],\\\"hasReport\\\":false}}","message":"请求成功","url_fuse":"yes"}'
resps['/result/getInfo/v1.2.0'] = '{"code":"200","data":"","message":"对不起没有查询到相关信息","url_fuse":"yes"}'
fuse:setup(function(this)
this.LIFETIME = 10
this.FAILS_LIMIT = 10
this.REQUEST_TIMEOUT = 5
this.FUSED_DURATION = 60
this.ON_DEGRADED_CALLBACK = function(self)
ngx.header['Content-Type']='application/json;charset=UTF-8'
ngx.say(resps[ngx.var.uri])
return ngx.exit(200)
end
this.VALIDATE_REQUEST = function(self)
local elapsed = ngx.now() - ngx.req.start_time()
return elapsed < self.REQUEST_TIMEOUT and ngx.status == 200
end
end)
}
server {
listen 8888;
access_log logs/access.log main;
location /test01.json {
default_type application/json;
content_by_lua_block {
ngx.say('{"msg":"注册成功","result":null,"code":"0000"}')
}
}
location /test02.json {
default_type application/json;
content_by_lua_block {
local random = require('resty.random')
ngx.say('{"success":true,"code":1,"message":"同步成功'..random.bytes(10,false)..'","data":null}')
}
}
location /fuse_status {
content_by_lua_block {
local cjson = require('cjson')
local fuse_dict = ngx.shared.fuse_shard_dict
local keys = fuse_dict:get_keys(1024)
for i,v in ipairs(keys) do
ngx.say(v..'=>'..tostring(fuse_dict:get(v)))
end
}
}
location @error_page_504 {
content_by_lua_block{
local resps = {}
resps['/abc001'] = '{"Response-Desc":"success","API-Status":"00","Response-Body":"{\\\"data\\\":\\\"{\\\\\\\"xsdsdv\\\\\\\":{},\\\\\\\"healthyStaus\\\\\\\":\\\\\\\"88\\\\\\\"}\\\"}","Response-Code":"000000000000","url_fuse":"504"}'
resps['/abc002/1'] = '{"code":"10","data":"{\\\"head\\\":{\\\"message\\\":\\\"接口调用成功\\\",\\\"status\\\":\\\"0\\\"},\\\"data\\\":{\\\"person\\\":{},\\\"sex\\\":{},\\\"healthyReport\\\":[{\\\"healthyId\\\":\\\"325f25f34efesdsa1a2392248cf28651\\\",\\\"etVersion\\\":9,\\\"healthyStaus\\\":\\\"99\\\",\\\"date\\\":\\\"2023-03-18\\\",\\\"dataSource\\\":\\\"福建省\\\"}],\\\"Reports\\\":[],\\\"sfxtex\\\":[],\\\"hasReport\\\":false}}","message":"请求成功","url_fuse":"504"}'
resps['/result/getInfo/v1.2.0'] = '{"code":"200","data":"","message":"对不起没有查询到相关信息","url_fuse":"504"}'
ngx.header['Content-Type']='application/json;charset=UTF-8'
ngx.say(resps[ngx.var.uri])
return ngx.exit(200)
}
}
location /abc001 {
access_by_lua_block {
local fuse = require "url_fuse"
fuse:run_access()
}
log_by_lua_block {
local fuse = require "url_fuse"
fuse:run_log()
}
proxy_connect_timeout 3s;
proxy_send_timeout 3s;
proxy_read_timeout 5s;
proxy_pass http://127.0.0.1:9999;
# error_page 500 502 503 504 = @error_page_504;
}
location /abc002/1 {
access_by_lua_block {
local fuse = require "url_fuse"
fuse:run_access()
}
log_by_lua_block {
local fuse = require "url_fuse"
fuse:run_log()
}
proxy_connect_timeout 3s;
proxy_send_timeout 3s;
proxy_read_timeout 5s;
proxy_pass http://127.0.0.1:9999;
error_page 500 502 503 504 = @error_page_504;
}
location /result/getInfo/v1.2.0 {
access_by_lua_block {
local fuse = require "url_fuse"
fuse:run_access()
}
log_by_lua_block {
local fuse = require "url_fuse"
fuse:run_log()
}
proxy_connect_timeout 3s;
proxy_send_timeout 3s;
proxy_read_timeout 5s;
proxy_pass http://127.0.0.1:9999;
error_page 500 502 503 504 = @error_page_504;
}
location /zc.json {
default_type application/json;
content_by_lua_block {
ngx.say('{"msg":"注册成功","result":null,"code":"0000"}')
}
}
}
}
这边定义了熔断后返回的报文,为更直观的实验效果,这里的报文加了"url_fuse":"yes"表示服务异常,启动熔断,返回兜底的报文信息。
定义了5xx报错返回的兜底报文,通常是504,上游响应超时,但实际生产会发生502 503等等报错,这边把所有的5xx报错都返回同一个熔断兜底报文。
验证
发送请求给openresty
curl http://127.0.0.1:8888/abc001
Access日志可以看到请求转发给nginx的9999端口
关闭nginx服务,模拟异常,服务不可调用。
查看access日志
查看error日志
可以看到请求14次返回502后,openresty开启了熔断,开始返回兜底报文。
对比我们配置的规则
基本吻合,过了一段时间后,我再发送请求
可以看到请求又试图转发给目标服务器,但是失败,继续开启熔断。
断断续续我又尝试了好几次,可以通过http://127.0.0.1:8888/fuse_status查看熔断的次数
又过了段时间,恢复nginx,继续发送请求
可以看到第一次检测成功后,它又关闭了熔断,所有请求都转发给nginx服务器。