一、kubernetes高可用集群二进制部署
(一)部署k8s高可用集群
参考:
https://www.kubernetes.org.cn/kubernetes%E8%AE%BE%E8%AE%A1%E6%9E%B6%E6%9E%84
https://github.com/easzlab/kubeasz
架构
主机清单
类型 | IP | 主机名 | VIP |
---|---|---|---|
deploy | 10.0.0.8 | k8s-deploy | |
master1 | 10.0.0.11 | k8s-master1 | 10.0.0.10 |
master2 | 10.0.0.12 | k8s-master2 | |
master3 | 10.0.0.13 | k8s-master3 | |
Etcd1 | 10.0.0.21 | k8s-etcd1 | |
Etcd2 | 10.0.0.22 | k8s-etcd2 | |
Etcd3 | 10.0.0.23 | k8s-etcd3 | |
LoadBalance1 | 10.0.0.31 | k8s-ha1 | |
LoadBalance2 | 10.0.0.32 | k8s-ha2 | |
Node1 | 10.0.0.41 | k8s-node2 | |
Node2 | 10.0.0.42 | k8s-node2 | |
Node3 | 10.0.0.43 | k8s-node3 | |
Harbor1 |
10.0.0.101 |
harbor1 |
10.0.0.100 |
Harbor2 |
10.0.0.102 |
harbor2 |
1. 准备基础环境
- 2c/4g内存(1c1g内存)/20g硬盘(该配置仅测试用)
- 最小化安装
Ubuntu 20.04 server
- 配置基础网络、更新源、SSH登录等
- 主机名、iptables、防火墙、内核参数和资源限制等系统配置
#! /usr/bin/env python3
# coding=utf-8
import os
import subprocess
def get_ubuntu_version():
"""
获取Ubuntu系统版本
:return: int
"""
# sh = r"grep 'VERSION_CODENAME' /etc/os-release|awk -F'=' '{print $2}'" #focal
sh = r"lsb_release -r|awk '{print $2}'|awk -F'.' '{print $1}'"
p = subprocess.run(sh, stdout=subprocess.PIPE, shell=True)
return int(p.stdout.decode())
def set_apt_ubuntu():
"""
替换成清华大学、阿里云apt镜像源
:return:
"""
st = """# 清华源
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
# 阿里源
deb https://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiverse
deb-src https://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiverse
deb https://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiverse
deb-src https://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiverse
deb https://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiverse
deb-src https://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiverse
# deb https://mirrors.aliyun.com/ubuntu/ focal-proposed main restricted universe multiverse
# deb-src https://mirrors.aliyun.com/ubuntu/ focal-proposed main restricted universe multiverse
deb https://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
deb-src https://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
"""
with open(r"/etc/apt/sources.list", 'w') as f:
f.write(st)
subprocess.run('apt update', shell=True)
print("更换国内apt镜像源完成")
def optimization_sshd():
"""
优化ssh
1. 允许root远程登录
2. 解决xshell无法连接Ubuntu
3. 取消远程连接确认提示
:return:
"""
word = r"KexAlgorithms curve25519-sha256@libssh.org,ecdh-sha2-nistp256,ecdh-sha2-nistp384,ecdh-sha2-nistp521," \
r"diffie-hellman-group14-sha1"
with open(r'/etc/ssh/sshd_config', 'a') as f:
f.write(r'PermitRootLogin yes'+'\n')
f.write(word + '\n')
with open(r'/etc/ssh/ssh_config', 'a') as f2:
f2.write(r'StrictHostKeyChecking no' + '\n')
subprocess.run(r"systemctl restart sshd", shell=True)
print('完成ssh优化')
def get_apt():
"""
安装系统常用命令
:return:
"""
sh = r"apt install -y iproute2 ntpdate tcpdump telnet traceroute nfs-kernel-server nfs-common lrzsz tree " \
r"openssl libssl-dev libpcre3-dev zlib1g-dev gcc openssh-server iotop unzip zip net-tools"
subprocess.run(sh, shell=True)
print('完成系统常用命令安装')
def set_ntp():
"""
时区设置、chrony对时
:return:
"""
sh = r'timedatectl set-timezone "Asia/Shanghai"'
subprocess.run(sh, shell=True)
sh = r"echo 'LC_TIME=en_DK.UTF-8' >> /etc/default/locale"
subprocess.run(sh, shell=True)
sh = r"apt install chrony -y"
subprocess.run(sh, shell=True)
sh = r'sed -i "2aserver time1.cloud.tencent.com iburst" /etc/chrony/chrony.conf'
subprocess.run(sh, shell=True)
subprocess.run('systemctl restart chronyd', shell=True)
print("完成时区、对时设置!")
def set_ps1():
"""
设置进入控制台窗口颜色
:return:
"""
sh = r"PS1='\[\e[1;33m\][\u@\h \W]\$\[\e[0m\]'"
with open('/root/.bashrc', 'a+') as f:
f.write(sh+'\n')
print('完成控制台窗口颜色设置')
def reboot():
"""
重启服务器
:return:
"""
subprocess.run('reboot', shell=True)
def main():
optimization_sshd()
set_apt_ubuntu()
set_ps1()
set_ntp()
get_apt()
reboot()
if __name__ == '__main__':
if get_ubuntu_version() == 20:
main()
else:
print("该系统不是ubuntu20,无法安装")
2. 部署高可用负载均衡
主机:10.0.0.31/32
2.1 优化内核参数
#限制响应级别:arp_ignore
#0:默认值,表示可使用本地任意接口上配置的任意地址进行响应
#1:仅在请求的目标IP配置在本地主机的接收到请求报文的接口上时,才给予响应
echo 1 > /proc/sys/net/ipv4/conf/all/arp_ignore
echo 1 > /proc/sys/net/ipv4/conf/lo/arp_ignore
#限制通告级别:arp_announce
#0:默认值,把本机所有接口的所有信息向每个接口的网络进行通告
#1:尽量避免将接口信息向非直接连接网络进行通告
#2:必须避免将接口信息向非本网络进行通告
echo 2 > /proc/sys/net/ipv4/conf/all/arp_announce
echo 2 > /proc/sys/net/ipv4/conf/lo/arp_announce
echo "net.ipv4.ip_nonlocal_bind = 1" >> /etc/sysctl.conf #开启后VIP不在本地,haproxy也可绑定该地址
echo "net.ipv4.ip_forward = 1" >> /etc/sysctl.conf #开启ipv4路由转发功能
#执行sysctl -p命令,修改内核生效
sysctl -p
2.2 安装keepalived、haproxy
apt update
apt install -y keepalived haproxy
2.3 keepalived
/etc/keepalived/keepalived.conf
配置文件
! Configuration File for keepalived
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 192.168.200.1
smtp_connect_timeout 30
router_id 10.0.0.31 #可设置当前主机名或IP
vrrp_skip_check_adv_addr
vrrp_garp_interval 0
vrrp_gna_interval 0
vrrp_mcast_group4 224.0.0.18
}
# 定义调用脚本
vrrp_script chk_haproxy {
script "/etc/keepalived/chk_haproxy.sh"
interval 1
timeout 2
weight -30
fall 3
rise 5
}
vrrp_instance k8s-master {
state MASTER
interface eth0
virtual_router_id 10 #每个虚拟路由器惟一标识,范围:0-255,每个虚拟路由器此值必须唯一
priority 100 #主机配置100,备机配置80
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
10.0.0.10/24 dev eth0 label eth0:0
}
track_script {
chk_haproxy
}
}
vrrp_instance harbor {
state MASTER
interface eth0
virtual_router_id 100
priority 80 #主机配置80,备机配置100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
10.0.0.100/24 dev eth0 label eth0:1
}
track_script {
chk_haproxy
}
}
编写调用脚本
cat >/etc/keepalived/chk_haproxy.sh <<EOF
#!/bin/bash
/usr/bin/killall -0 haproxy
EOF
#添加执行权限
chmod a+x /etc/keepalived/chk_haproxy.sh
2.4 haproxy
/etc/haproxy/haproxy.cfg
配置文件
#末尾添加如下配置
listen stats
mode http
bind 0.0.0.0:9999
stats enable
log global
stats uri /haproxy-status
stats auth haadmin:123456
listen harbor_80
bind 10.0.0.100:80
mode tcp
balance source #源地址hash
server harbor1 10.0.0.101:80 check inter 3s fall 3 rise 5
server harbor2 10.0.0.102:80 check inter 3s fall 3 rise 5
listen harbor_443
bind 10.0.0.100:443
mode tcp
balance source #源地址hash
server harbor1 10.0.0.101:443 check inter 3s fall 3 rise 5
server harbor2 10.0.0.102:443 check inter 3s fall 3 rise 5
listen k8s_api_6443
bind 10.0.0.10:6443
mode tcp
server k8s-master1 10.0.0.11:6443 check inter 3s fall 3 rise 5
server k8s-master2 10.0.0.12:6443 check inter 3s fall 3 rise 5
server k8s-master3 10.0.0.13:6443 check inter 3s fall 3 rise 5
重启服务
systemctl restart keepalived haproxy
2.5 查看状态
haproxy1状态
# 查看IP
[root@k8s-ha1 ~]#ifconfig -a
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 10.0.0.31 netmask 255.255.255.0 broadcast 10.0.0.255
inet6 fe80::20c:29ff:fe5d:8a58 prefixlen 64 scopeid 0x20<link>
ether 00:0c:29:5d:8a:58 txqueuelen 1000 (Ethernet)
RX packets 161493 bytes 205634365 (205.6 MB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 33266 bytes 2274864 (2.2 MB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
eth0:0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 10.0.0.10 netmask 255.255.255.0 broadcast 0.0.0.0
ether 00:0c:29:5d:8a:58 txqueuelen 1000 (Ethernet)
lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10<host>
loop txqueuelen 1000 (Local Loopback)
RX packets 4 bytes 268 (268.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 4 bytes 268 (268.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
#查看端口
[root@k8s-ha1 ~]#netstat -ntlp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 10.0.0.10:6443 0.0.0.0:* LISTEN 68768/haproxy
tcp 0 0 0.0.0.0:9999 0.0.0.0:* LISTEN 68768/haproxy
tcp 0 0 0.0.0.0:34671 0.0.0.0:* LISTEN 812/rpc.mountd
tcp 0 0 0.0.0.0:111 0.0.0.0:* LISTEN 1/init
tcp 0 0 10.0.0.100:80 0.0.0.0:* LISTEN 68768/haproxy
tcp 0 0 127.0.0.53:53 0.0.0.0:* LISTEN 810/systemd-resolve
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 871/sshd: /usr/sbin
tcp 0 0 127.0.0.1:6010 0.0.0.0:* LISTEN 1173/sshd: root@pts
tcp 0 0 10.0.0.100:443 0.0.0.0:* LISTEN 68768/haproxy
tcp 0 0 0.0.0.0:35613 0.0.0.0:* LISTEN -
tcp 0 0 0.0.0.0:2049 0.0.0.0:* LISTEN -
tcp 0 0 0.0.0.0:49537 0.0.0.0:* LISTEN 812/rpc.mountd
tcp 0 0 0.0.0.0:35877 0.0.0.0:* LISTEN 812/rpc.mountd
tcp6 0 0 :::44943 :::* LISTEN 812/rpc.mountd
tcp6 0 0 :::111 :::* LISTEN 1/init
tcp6 0 0 :::22 :::* LISTEN 871/sshd: /usr/sbin
tcp6 0 0 ::1:6010 :::* LISTEN 1173/sshd: root@pts
tcp6 0 0 :::2049 :::* LISTEN -
tcp6 0 0 :::43491 :::* LISTEN -
tcp6 0 0 :::43235 :::* LISTEN 812/rpc.mountd
tcp6 0 0 :::55557 :::* LISTEN 812/rpc.mountd
haproxy2状态
# 查看IP
[root@k8s-ha2 ~]#ifconfig -a
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 10.0.0.32 netmask 255.255.255.0 broadcast 10.0.0.255
inet6 fe80::20c:29ff:fe34:714d prefixlen 64 scopeid 0x20<link>
ether 00:0c:29:34:71:4d txqueuelen 1000 (Ethernet)
RX packets 174617 bytes 210606655 (210.6 MB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 46822 bytes 3082846 (3.0 MB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
eth0:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 10.0.0.100 netmask 255.255.255.0 broadcast 0.0.0.0
ether 00:0c:29:34:71:4d txqueuelen 1000 (Ethernet)
lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10<host>
loop txqueuelen 1000 (Local Loopback)
RX packets 4 bytes 268 (268.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 4 bytes 268 (268.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
#查看端口
[root@k8s-ha2 ~]#netstat -ntlp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 0.0.0.0:2049 0.0.0.0:* LISTEN -
tcp 0 0 10.0.0.10:6443 0.0.0.0:* LISTEN 69215/haproxy
tcp 0 0 0.0.0.0:44235 0.0.0.0:* LISTEN 811/rpc.mountd
tcp 0 0 0.0.0.0:9999 0.0.0.0:* LISTEN 69215/haproxy
tcp 0 0 0.0.0.0:111 0.0.0.0:* LISTEN 1/init
tcp 0 0 10.0.0.100:80 0.0.0.0:* LISTEN 69215/haproxy
tcp 0 0 0.0.0.0:32979 0.0.0.0:* LISTEN 811/rpc.mountd
tcp 0 0 127.0.0.53:53 0.0.0.0:* LISTEN 809/systemd-resolve
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 872/sshd: /usr/sbin
tcp 0 0 127.0.0.1:6010 0.0.0.0:* LISTEN 1175/sshd: root@pts
tcp 0 0 10.0.0.100:443 0.0.0.0:* LISTEN 69215/haproxy
tcp 0 0 0.0.0.0:47613 0.0.0.0:* LISTEN 811/rpc.mountd
tcp 0 0 0.0.0.0:35263 0.0.0.0:* LISTEN -
tcp6 0 0 :::2049 :::* LISTEN -
tcp6 0 0 :::41353 :::* LISTEN -
tcp6 0 0 :::111 :::* LISTEN 1/init
tcp6 0 0 :::37171 :::* LISTEN 811/rpc.mountd
tcp6 0 0 :::22 :::* LISTEN 872/sshd: /usr/sbin
tcp6 0 0 :::36089 :::* LISTEN 811/rpc.mountd
tcp6 0 0 :::34521 :::* LISTEN 811/rpc.mountd
tcp6 0 0 ::1:6010 :::* LISTEN 1175/sshd: root@pts
3. 部署harbor
主机:10.0.0.101/102
3.1 安装docker
- 系统优化
# 关闭防火墙
systemctl disable firewalld && systemctl stop firewalld
# 在/etc/hosts中添加IP、主机名
cat >> /etc/hosts <<EOF
`hostname -I|awk '{print $1}'` `hostname`
EOF
# 内核参数优化
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
br_netfilter
EOF
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sudo sysctl --system
# 关闭swap
# 在/etc/fstab注释swap那一行
sed -ri 's/(^[^#]*swap)/#\1/' /etc/fstab
echo 'swapoff -a' >> /etc/profile
swapoff -a
# 修改grub
sed -i '/GRUB_CMDLINE_LINUX="net.ifnames=0 biosdevname=0"/c GRUB_CMDLINE_LINUX="net.ifnames=0 biosdevname=0 cgroup_enable=memory swapaccount=1"' /etc/default/grub
update-grub
reboot
- 安装docker
#! /bin/bash
# docker版本
docker_version=5:20.10.10~3-0
apt update
# 安装依赖包
apt install -y \
apt-transport-https \
ca-certificates \
curl \
gnupg \
lsb-release \
software-properties-common
# 安装GPG证书
curl -fsSL http://mirrors.aliyun.com/docker-ce/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=$(dpkg --print-architecture)] http://mirrors.aliyun.com/docker-ce/linux/ubuntu \
$(lsb_release -cs) stable"
apt update
# apt-cache madison docker-ce docker-ce-cli
apt -y install docker-ce=${docker_version}~ubuntu-$(lsb_release -cs) \
docker-ce-cli=${docker_version}~ubuntu-$(lsb_release -cs)
# 设置docker的cgroup driver
# docker 默认的 cgroup driver 是 cgroupfs,可以通过 docker info 命令查看
# 如果用户没有在 KubeletConfiguration 下设置 cgroupDriver 字段,则 kubeadm 将默认为systemd,需要将docker cgroup driver更改为systemd
# 配置docker hub镜像加速
cat <<EOF >/etc/docker/daemon.json
{
"exec-opts": ["native.cgroupdriver=systemd"],
"registry-mirrors": ["https://ung2thfc.mirror.aliyuncs.com",
"https://registry.docker-cn.com",
"http://hub-mirror.c.163.com",
"https://docker.mirrors.ustc.edu.cn"]
}
EOF
systemctl daemon-reload
systemctl restart docker
3.2 安装docker-compose
# 下载二进制程序
wget https://github.com/docker/compose/releases/download/v2.12.0/docker-compose-linux-x86_64
#wget https://shichu.fun/download/packages/docker/compose/releases/download/v2.12.0/docker-compose-linux-x86_64
chmod a+x docker-compose-linux-x86_64
mv docker-compose-linux-x86_64 /usr/bin/docker-compose
3.3 安装harbor
- 主机(10.0.0.101)
签发证书
# 颁发证书,参考https://goharbor.io/docs/2.6.0/install-config/configure-https/
mkdir -p /apps/harbor/certs
cd /apps/harbor/certs
# 自签名CA机构
## 私有CA key
openssl genrsa -out ca.key 4096
## 自签发CA crt证书
openssl req -x509 -new -nodes -sha512 -days 3650 \
-subj "/C=CN/ST=Beijing/L=Beijing/O=example/OU=Personal/CN=chu.com" \
-key ca.key \
-out ca.crt
## 记录证书签发信息
touch /root/.rnd
# 服务器域名证书申请
## harbor服务器私有key
openssl genrsa -out chu.net.key 4096
## harbor服务器csr文件
openssl req -sha512 -new \
-subj "/C=CN/ST=Beijing/L=Beijing/O=example/OU=Personal/CN=chu.net" \
-key chu.net.key \
-out chu.net.csr
# 准备签发环境,证书签发SAN文件
cat > v3.ext <<-EOF
authorityKeyIdentifier=keyid,issuer
basicConstraints=CA:FALSE
keyUsage = digitalSignature, nonRepudiation, keyEncipherment, dataEncipherment
extendedKeyUsage = serverAuth
subjectAltName = @alt_names
[alt_names]
DNS.1=chu.net
DNS.2=harbor.chu.net
DNS.3=harbor.chu.local
EOF
## 使用自签名CA签发harbor证书
openssl x509 -req -sha512 -days 3650 \
-extfile v3.ext \
-CA ca.crt -CAkey ca.key -CAcreateserial \
-in chu.net.csr \
-out chu.net.crt
部署harbor
#安装harbor
cd /opt
# 下载二进制程序
wget https://github.com/goharbor/harbor/releases/download/v2.6.2/harbor-offline-installer-v2.6.2.tgz
tar xvf harbor-offline-installer-v2.6.2.tgz -C /apps
cd /apps/harbor
#egrep -v '^\s*#|^$' harbor.yml.tmpl > harbor.yml
cp harbor.yml.tmpl harbor.yml
#根据实际修改hostnanme、harbor_admin_password、database等,hostname为v3.ext文件中alt_names其中一个
sed -i -e "s/hostname: reg.mydomain.com/hostname: harbor.chu.net/g" \
-e "s#certificate: /your/certificate/path#certificate: /apps/harbor/certs/chu.net.crt#g" \
-e "s#private_key: /your/private/key/path#private_key: /apps/harbor/certs/chu.net.key#g" \
-e "/harbor_admin_password/c harbor_admin_password: 12345" \
harbor.yml
#开始安装
./install.sh --with-trivy --with-chartmuseum
- 备机(10.0.0.102)
复制主机签发证书
mkdir -p /apps/harbor/certs
scp 10.0.0.101:/apps/harbor/certs/* /apps/harbor/certs
部署harbor
# 安装harbor
cd /opt
# 下载二进制程序
wget https://github.com/goharbor/harbor/releases/download/v2.6.2/harbor-offline-installer-v2.6.2.tgz
tar xvf harbor-offline-installer-v2.6.2.tgz -C /apps
cd /apps/harbor
# egrep -v '^\s*#|^$' harbor.yml.tmpl > harbor.yml
cp harbor.yml.tmpl harbor.yml
# 根据实际修改hostnanme、harbor_admin_password、database等
sed -i -e "s/hostname: reg.mydomain.com/hostname: harbor.chu.net/g" \
-e "s#certificate: /your/certificate/path#certificate: /apps/harbor/certs/chu.net.crt#g" \
-e "s#private_key: /your/private/key/path#private_key: /apps/harbor/certs/chu.net.key#g" \
-e "/harbor_admin_password/c harbor_admin_password: 12345" \
harbor.yml
# 开始安装
./install.sh --with-trivy --with-chartmuseum
# 若更新配置
docker-compose down -v
./prepare --with-notary --with-trivy --with-chartmuseum
docker-compose up -d
3.4 配置双主镜像复制
- 修改
/etc/hosts
文件
# 主机
echo "10.0.0.102 harbor.chu.net" >> /etc/hosts
# 备机
echo "10.0.0.101 harbor.chu.net" >> /etc/hosts
- 设置同步
主机
系统管理--仓库管理
系统管理--复制管理
备机
系统管理--仓库管理
系统管理--复制管理
- haproxy监控状态
3.5 客户端连接
- 配置
#docker客户端创建域名目录
mkdir /etc/docker/certs.d/harbor.chu.net -p
#将horbor服务器公钥复制到客户端目录
scp 10.0.0.101:/apps/harbor/certs/chu.net.crt /etc/docker/certs.d/harbor.chu.net/
# 配置hosts
echo "10.0.0.100 harbor.chu.net" >> /etc/hosts
- 登录
[root@k8s-node1 ~]#docker login harbor.chu.net
Username: admin
Password:
WARNING! Your password will be stored unencrypted in /root/.docker/config.json.
Configure a credential helper to remove this warning. See
https://docs.docker.com/engine/reference/commandline/login/#credentials-store
Login Succeeded
- 上传镜像
[root@k8s-node1 ~]#docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
alpine latest c059bfaa849c 13 months ago 5.59MB
[root@k8s-node1 ~]#docker tag alpine:latest harbor.chu.net/test/alpine:v1
[root@k8s-node1 ~]#docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
alpine latest c059bfaa849c 13 months ago 5.59MB
harbor.chu.net/test/alpine v1 c059bfaa849c 13 months ago 5.59MB
# 上传
[root@k8s-node1 ~]#docker push harbor.chu.net/test/alpine:v1
The push refers to repository [harbor.chu.net/test/alpine]
8d3ac3489996: Pushed
v1: digest: sha256:e7d88de73db3d3fd9b2d63aa7f447a10fd0220b7cbf39803c803f2af9ba256b3 size: 528
查看harbor仓库(主/备)
- 下载镜像
[root@k8s-node2 ~]#docker pull harbor.chu.net/test/alpine:v1
v1: Pulling from test/alpine
59bf1c3509f3: Pull complete
Digest: sha256:e7d88de73db3d3fd9b2d63aa7f447a10fd0220b7cbf39803c803f2af9ba256b3
Status: Downloaded newer image for harbor.chu.net/test/alpine:v1
harbor.chu.net/test/alpine:v1
[root@k8s-node2 ~]#docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
harbor.chu.net/test/alpine v1 c059bfaa849c 13 months ago 5.59MB
4. ansible部署k8s集群
主机:10.0.0.8
4.1 准备基础环境
安装工具
# 安装sshpass同步公钥到各各服务器
apt install -y ansible sshpass git
ssh-keygen
配置ssh免密登录
配置从部署节点能够ssh免密登录所有节点,并且设置python软连接
#! /bin/bash
host="
10.0.0.8
10.0.0.11
10.0.0.12
10.0.0.13
10.0.0.21
10.0.0.22
10.0.0.23
10.0.0.31
10.0.0.32
10.0.0.41
10.0.0.42
10.0.0.43
10.0.0.101
10.0.0.102
"
for IP in $host;do
sshpass -p 123456 ssh-copy-id $IP -o StrictHostKeyChecking=no
# 为每个节点设置python软链接
ssh $IP ln -s /usr/bin/python3 /usr/bin/python
done
4.2 下载kubeasz项目源码、二进制及离线镜像
- 下载工具脚本ezdown
export release=3.3.1
wget https://github.com/easzlab/kubeasz/releases/download/${release}/ezdown
chmod +x ./ezdown
- 下载kubeasz代码、二进制、默认容器镜像(更多关于ezdown的参数,运行./ezdown 查看)
# vim ezdown可自定义下载组件版本
# 国内环境
./ezdown -D
# 海外环境
#./ezdown -D -m standard
【可选】下载额外容器镜像(cilium,flannel,prometheus等)
./ezdown -X
【可选】下载离线系统包 (适用于无法使用yum/apt仓库情形)
./ezdown -P
- 上述脚本运行成功后,所有文件(kubeasz代码、二进制、离线镜像)均会整理好放入目录
/etc/kubeasz
[root@k8s-deploy ~]#ls /etc/kubeasz/
README.md ansible.cfg bin docs down example ezctl ezdown manifests pics playbooks roles tools
- 配置harbor连接
#docker客户端创建域名目录
mkdir /etc/docker/certs.d/harbor.chu.net -p
#将horbor服务器公钥复制到客户端目录
scp 10.0.0.101:/apps/harbor/certs/chu.net.crt /etc/docker/certs.d/harbor.chu.net/
# 配置hosts
echo "10.0.0.100 harbor.chu.net" >> /etc/hosts
登录harbor仓库
[root@k8s-deploy kubeasz]#docker login harbor.chu.net
Username: admin
Password:
WARNING! Your password will be stored unencrypted in /root/.docker/config.json.
Configure a credential helper to remove this warning. See
https://docs.docker.com/engine/reference/commandline/login/#credentials-store
Login Succeeded
上传pause镜像至harbor仓库
[root@k8s-deploy kubeasz]#docker tag easzlab.io.local:5000/easzlab/pause:3.7 harbor.chu.net/baseimages/pause:3.7
[root@k8s-deploy kubeasz]#docker push harbor.chu.net/baseimages/pause:3.7
The push refers to repository [harbor.chu.net/baseimages/pause]
1cb555415fd3: Pushed
3.7: digest: sha256:445a99db22e9add9bfb15ddb1980861a329e5dff5c88d7eec9cbf08b6b2f4eb1 size: 526
4.3 生产并自定义hosts文件
[root@k8s-deploy kubeasz]#pwd
/etc/kubeasz
[root@k8s-deploy kubeasz]#./ezctl new k8s-cluster1
2023-01-17 20:30:28 DEBUG generate custom cluster files in /etc/kubeasz/clusters/k8s-cluster1
2023-01-17 20:30:28 DEBUG set versions
2023-01-17 20:30:28 DEBUG cluster k8s-cluster1: files successfully created.
2023-01-17 20:30:28 INFO next steps 1: to config '/etc/kubeasz/clusters/k8s-cluster1/hosts'
2023-01-17 20:30:28 INFO next steps 2: to config '/etc/kubeasz/clusters/k8s-cluster1/config.yml'
-
编辑hosts文件
指定etcd节点、master节点、node节点、VIP、运行时、网络组件类型、service IP与pod IP范围等配置信息。
[root@k8s-deploy ~]#cat /etc/kubeasz/clusters/k8s-cluster1/hosts
# 'etcd' cluster should have odd member(s) (1,3,5,...) [etcd] 10.0.0.21 10.0.0.22 10.0.0.23 # master node(s) [kube_master] 10.0.0.11 10.0.0.12 # work node(s) [kube_node] 10.0.0.41 10.0.0.42 # [optional] harbor server, a private docker registry # 'NEW_INSTALL': 'true' to install a harbor server; 'false' to integrate with existed one [harbor] #192.168.1.8 NEW_INSTALL=false # [optional] loadbalance for accessing k8s from outside [ex_lb] #192.168.1.6 LB_ROLE=backup EX_APISERVER_VIP=192.168.1.250 EX_APISERVER_PORT=8443 #192.168.1.7 LB_ROLE=master EX_APISERVER_VIP=192.168.1.250 EX_APISERVER_PORT=8443 # [optional] ntp server for the cluster [chrony] #192.168.1.1 [all:vars] # --------- Main Variables --------------- # Secure port for apiservers SECURE_PORT="6443" # Cluster container-runtime supported: docker, containerd # if k8s version >= 1.24, docker is not supported CONTAINER_RUNTIME="containerd" # Network plugins supported: calico, flannel, kube-router, cilium, kube-ovn CLUSTER_NETWORK="calico" # Service proxy mode of kube-proxy: 'iptables' or 'ipvs' PROXY_MODE="ipvs" # K8S Service CIDR, not overlap with node(host) networking SERVICE_CIDR="10.100.0.0/16" # Cluster CIDR (Pod CIDR), not overlap with node(host) networking CLUSTER_CIDR="10.200.0.0/16" # NodePort Range NODE_PORT_RANGE="30000-62767" # Cluster DNS Domain CLUSTER_DNS_DOMAIN="cluster.local" # -------- Additional Variables (don't change the default value right now) --- # Binaries Directory bin_dir="/usr/local/bin" # Deploy Directory (kubeasz workspace) base_dir="/etc/kubeasz" # Directory for a specific cluster cluster_dir="{{ base_dir }}/clusters/k8s-cluster1" # CA and other components cert/key Directory ca_dir="/etc/kubernetes/ssl"
-
编辑config.yml文件
[root@k8s-deploy ~]#cat /etc/kubeasz/clusters/k8s-cluster1/config.yml
############################ # prepare ############################ # 可选离线安装系统软件包 (offline|online) INSTALL_SOURCE: "online" # 可选进行系统安全加固 github.com/dev-sec/ansible-collection-hardening OS_HARDEN: false ############################ # role:deploy ############################ # default: ca will expire in 100 years # default: certs issued by the ca will expire in 50 years CA_EXPIRY: "876000h" CERT_EXPIRY: "438000h" # kubeconfig 配置参数 CLUSTER_NAME: "cluster1" CONTEXT_NAME: "context-{{ CLUSTER_NAME }}" # k8s version K8S_VER: "1.24.2" ############################ # role:etcd ############################ # 设置不同的wal目录,可以避免磁盘io竞争,提高性能 ETCD_DATA_DIR: "/var/lib/etcd" ETCD_WAL_DIR: "" ############################ # role:runtime [containerd,docker] ############################ # ------------------------------------------- containerd # [.]启用容器仓库镜像 ENABLE_MIRROR_REGISTRY: true # [containerd]基础容器镜像 SANDBOX_IMAGE: "harbor.chu.net/baseimages/pause:3.7" # [containerd]容器持久化存储目录 CONTAINERD_STORAGE_DIR: "/var/lib/containerd" # ------------------------------------------- docker # [docker]容器存储目录 DOCKER_STORAGE_DIR: "/var/lib/docker" # [docker]开启Restful API ENABLE_REMOTE_API: false # [docker]信任的HTTP仓库 INSECURE_REG: '["http://easzlab.io.local:5000","harbor.chu.net"]' ############################ # role:kube-master ############################ # k8s 集群 master 节点证书配置,可以添加多个ip和域名(比如增加公网ip和域名) MASTER_CERT_HOSTS: - "10.0.0.10" - "api.myserver.com" #- "www.test.com" # node 节点上 pod 网段掩码长度(决定每个节点最多能分配的pod ip地址) # 如果flannel 使用 --kube-subnet-mgr 参数,那么它将读取该设置为每个节点分配pod网段 # https://github.com/coreos/flannel/issues/847 NODE_CIDR_LEN: 23 ############################ # role:kube-node ############################ # Kubelet 根目录 KUBELET_ROOT_DIR: "/var/lib/kubelet" # node节点最大pod 数 MAX_PODS: 500 # 配置为kube组件(kubelet,kube-proxy,dockerd等)预留的资源量 # 数值设置详见templates/kubelet-config.yaml.j2 KUBE_RESERVED_ENABLED: "no" # k8s 官方不建议草率开启 system-reserved, 除非你基于长期监控,了解系统的资源占用状况; # 并且随着系统运行时间,需要适当增加资源预留,数值设置详见templates/kubelet-config.yaml.j2 # 系统预留设置基于 4c/8g 虚机,最小化安装系统服务,如果使用高性能物理机可以适当增加预留 # 另外,集群安装时候apiserver等资源占用会短时较大,建议至少预留1g内存 SYS_RESERVED_ENABLED: "no" ############################ # role:network [flannel,calico,cilium,kube-ovn,kube-router] ############################ # ------------------------------------------- flannel # [flannel]设置flannel 后端"host-gw","vxlan"等 FLANNEL_BACKEND: "vxlan" DIRECT_ROUTING: false # [flannel] flanneld_image: "quay.io/coreos/flannel:v0.10.0-amd64" flannelVer: "v0.15.1" flanneld_image: "easzlab.io.local:5000/easzlab/flannel:{{ flannelVer }}" # ------------------------------------------- calico # [calico]设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能,条件限制详见 docs/setup/calico.md CALICO_IPV4POOL_IPIP: "Always" # [calico]设置 calico-node使用的host IP,bgp邻居通过该地址建立,可手工指定也可以自动发现 IP_AUTODETECTION_METHOD: "can-reach={{ groups['kube_master'][0] }}" # [calico]设置calico 网络 backend: brid, vxlan, none CALICO_NETWORKING_BACKEND: "brid" # [calico]设置calico 是否使用route reflectors # 如果集群规模超过50个节点,建议启用该特性 CALICO_RR_ENABLED: false # CALICO_RR_NODES 配置route reflectors的节点,如果未设置默认使用集群master节点 # CALICO_RR_NODES: ["192.168.1.1", "192.168.1.2"] CALICO_RR_NODES: [] # [calico]更新支持calico 版本: [v3.3.x] [v3.4.x] [v3.8.x] [v3.15.x] calico_ver: "v3.19.4" # [calico]calico 主版本 calico_ver_main: "{{ calico_ver.split('.')[0] }}.{{ calico_ver.split('.')[1] }}" # ------------------------------------------- cilium # [cilium]镜像版本 cilium_ver: "1.11.6" cilium_connectivity_check: true cilium_hubble_enabled: false cilium_hubble_ui_enabled: false # ------------------------------------------- kube-ovn # [kube-ovn]选择 OVN DB and OVN Control Plane 节点,默认为第一个master节点 OVN_DB_NODE: "{{ groups['kube_master'][0] }}" # [kube-ovn]离线镜像tar包 kube_ovn_ver: "v1.5.3" # ------------------------------------------- kube-router # [kube-router]公有云上存在限制,一般需要始终开启 ipinip;自有环境可以设置为 "subnet" OVERLAY_TYPE: "full" # [kube-router]NetworkPolicy 支持开关 FIREWALL_ENABLE: true # [kube-router]kube-router 镜像版本 kube_router_ver: "v0.3.1" busybox_ver: "1.28.4" ############################ # role:cluster-addon ############################ # coredns 自动安装 dns_install: "no" corednsVer: "1.9.3" ENABLE_LOCAL_DNS_CACHE: false dnsNodeCacheVer: "1.21.1" # 设置 local dns cache 地址 LOCAL_DNS_CACHE: "169.254.20.10" # metric server 自动安装 metricsserver_install: "no" metricsVer: "v0.5.2" # dashboard 自动安装 dashboard_install: "yes" dashboardVer: "v2.5.1" dashboardMetricsScraperVer: "v1.0.8" # prometheus 自动安装 prom_install: "no" prom_namespace: "monitor" prom_chart_ver: "35.5.1" # nfs-provisioner 自动安装 nfs_provisioner_install: "no" nfs_provisioner_namespace: "kube-system" nfs_provisioner_ver: "v4.0.2" nfs_storage_class: "managed-nfs-storage" nfs_server: "192.168.1.10" nfs_path: "/data/nfs" # network-check 自动安装 network_check_enabled: false network_check_schedule: "*/5 * * * *" ############################ # role:harbor ############################ # harbor version,完整版本号 HARBOR_VER: "v2.1.3" HARBOR_DOMAIN: "harbor.easzlab.io.local" HARBOR_TLS_PORT: 8443 # if set 'false', you need to put certs named harbor.pem and harbor-key.pem in directory 'down' HARBOR_SELF_SIGNED_CERT: true # install extra component HARBOR_WITH_NOTARY: false HARBOR_WITH_TRIVY: false HARBOR_WITH_CLAIR: false HARBOR_WITH_CHARTMUSEUM: true
4.4 部署k8s集群
通过ansible脚本初始化环境及部署k8s高可用集群
# 查看帮助
[root@k8s-deploy kubeasz]#./ezctl --help
Usage: ezctl COMMAND [args]
-------------------------------------------------------------------------------------
Cluster setups:
list to list all of the managed clusters
checkout <cluster> to switch default kubeconfig of the cluster
new <cluster> to start a new k8s deploy with name 'cluster'
setup <cluster> <step> to setup a cluster, also supporting a step-by-step way
start <cluster> to start all of the k8s services stopped by 'ezctl stop'
stop <cluster> to stop all of the k8s services temporarily
upgrade <cluster> to upgrade the k8s cluster
destroy <cluster> to destroy the k8s cluster
backup <cluster> to backup the cluster state (etcd snapshot)
restore <cluster> to restore the cluster state from backups
start-aio to quickly setup an all-in-one cluster with 'default' settings
Cluster ops:
add-etcd <cluster> <ip> to add a etcd-node to the etcd cluster
add-master <cluster> <ip> to add a master node to the k8s cluster
add-node <cluster> <ip> to add a work node to the k8s cluster
del-etcd <cluster> <ip> to delete a etcd-node from the etcd cluster
del-master <cluster> <ip> to delete a master node from the k8s cluster
del-node <cluster> <ip> to delete a work node from the k8s cluster
Extra operation:
kcfg-adm <cluster> <args> to manage client kubeconfig of the k8s cluster
Use "ezctl help <command>" for more information about a given command.
可自定义配置
[root@k8s-deploy kubeasz]#ll /etc/kubeasz/playbooks/
total 92
drwxrwxr-x 2 root root 4096 Jul 3 2022 ./
drwxrwxr-x 13 root root 240 Jan 17 20:30 ../
-rw-rw-r-- 1 root root 443 Jul 3 2022 01.prepare.yml
-rw-rw-r-- 1 root root 58 Jul 3 2022 02.etcd.yml
-rw-rw-r-- 1 root root 209 Jul 3 2022 03.runtime.yml
-rw-rw-r-- 1 root root 482 Jul 3 2022 04.kube-master.yml
-rw-rw-r-- 1 root root 218 Jul 3 2022 05.kube-node.yml
-rw-rw-r-- 1 root root 408 Jul 3 2022 06.network.yml
-rw-rw-r-- 1 root root 77 Jul 3 2022 07.cluster-addon.yml
-rw-rw-r-- 1 root root 34 Jul 3 2022 10.ex-lb.yml
-rw-rw-r-- 1 root root 3893 Jul 3 2022 11.harbor.yml
-rw-rw-r-- 1 root root 1567 Jul 3 2022 21.addetcd.yml
-rw-rw-r-- 1 root root 667 Jul 3 2022 22.addnode.yml
-rw-rw-r-- 1 root root 1050 Jul 3 2022 23.addmaster.yml
-rw-rw-r-- 1 root root 3344 Jul 3 2022 31.deletcd.yml
-rw-rw-r-- 1 root root 2018 Jul 3 2022 32.delnode.yml
-rw-rw-r-- 1 root root 2071 Jul 3 2022 33.delmaster.yml
-rw-rw-r-- 1 root root 1891 Jul 3 2022 90.setup.yml
-rw-rw-r-- 1 root root 1054 Jul 3 2022 91.start.yml
-rw-rw-r-- 1 root root 934 Jul 3 2022 92.stop.yml
-rw-rw-r-- 1 root root 1042 Jul 3 2022 93.upgrade.yml
-rw-rw-r-- 1 root root 1786 Jul 3 2022 94.backup.yml
-rw-rw-r-- 1 root root 999 Jul 3 2022 95.restore.yml
-rw-rw-r-- 1 root root 337 Jul 3 2022 99.clean.yml
4.4.1 环境初始化
准备CA和基础系统设置
/etc/kubeasz/ezctl setup k8s-cluster1 01
完成输出信息:
PLAY RECAP *******************************************************************************************************************************************************************************************************
10.0.0.11 : ok=27 changed=23 unreachable=0 failed=0 skipped=113 rescued=0 ignored=0
10.0.0.12 : ok=27 changed=24 unreachable=0 failed=0 skipped=113 rescued=0 ignored=0
10.0.0.21 : ok=24 changed=21 unreachable=0 failed=0 skipped=116 rescued=0 ignored=0
10.0.0.22 : ok=24 changed=20 unreachable=0 failed=0 skipped=116 rescued=0 ignored=0
10.0.0.23 : ok=24 changed=21 unreachable=0 failed=0 skipped=116 rescued=0 ignored=0
10.0.0.41 : ok=26 changed=22 unreachable=0 failed=0 skipped=114 rescued=0 ignored=0
10.0.0.42 : ok=26 changed=23 unreachable=0 failed=0 skipped=114 rescued=0 ignored=0
localhost : ok=33 changed=31 unreachable=0 failed=0 skipped=11 rescued=0 ignored=0
4.4.2 部署etcd集群
/etc/kubeasz/ezctl setup k8s-cluster1 02
验证etcd服务
NODE_IPS="
10.0.0.21
10.0.0.22
10.0.0.23
"
for ip in ${NODE_IPS};do /usr/local/bin/etcdctl \
--endpoints=https://$ip:2379 \
--cacert=/etc/kubernetes/ssl/ca.pem \
--cert=/etc/kubernetes/ssl/etcd.pem \
--key=/etc/kubernetes/ssl/etcd-key.pem \
endpoint health;done
输出如下信息,表示etcd集群运行正常
https://10.0.0.21:2379 is healthy: successfully committed proposal: took = 12.461231ms
https://10.0.0.22:2379 is healthy: successfully committed proposal: took = 11.711479ms
https://10.0.0.23:2379 is healthy: successfully committed proposal: took = 9.954031ms
4.4.3 部署运行时
安装containerd
修改/etc/kubeasz/roles/containerd/templates/config.toml.j2
#添加至157行
sed -ri '156a\\t[plugins."io.containerd.grpc.v1.cri".registry.mirrors."harbor.chu.net"]\
\t\tendpoint = ["harbor.chu.net"]\
\t[plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.chu.net".tls]\
\t\tinsecure_skip_verify = true\
\t[plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.chu.net".auth]\
\t\tusername = "admin"\
\t\tpassword = "12345"' config.toml.j2
执行安装命令
/etc/kubeasz/ezctl setup k8s-cluster1 03
输出结果
PLAY RECAP ********************************************************************************************
10.0.0.11 : ok=11 changed=10 unreachable=0 failed=0 skipped=18 rescued=0 ignored=0
10.0.0.12 : ok=11 changed=10 unreachable=0 failed=0 skipped=15 rescued=0 ignored=0
10.0.0.41 : ok=11 changed=10 unreachable=0 failed=0 skipped=24 rescued=0 ignored=0
10.0.0.42 : ok=11 changed=10 unreachable=0 failed=0 skipped=15 rescued=0 ignored=0
4.4.4 部署master
配置harbor域名解析
ssh 10.0.0.11 "echo '10.0.0.100 harbor.chu.net'>>/etc/hosts"
ssh 10.0.0.12 "echo '10.0.0.100 harbor.chu.net'>>/etc/hosts"
执行命令
/etc/kubeasz/ezctl setup k8s-cluster1 04
输出结果
PLAY RECAP *************************************************************************************************************************************************
10.0.0.11 : ok=55 changed=49 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
10.0.0.12 : ok=53 changed=47 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
查看状态
[root@k8s-deploy kubeasz]#kubectl get node
NAME STATUS ROLES AGE VERSION
10.0.0.11 Ready,SchedulingDisabled master 50s v1.24.2
10.0.0.12 Ready,SchedulingDisabled master 50s v1.24.2
拉取镜像
[root@k8s-master1 ~]#crictl pull harbor.chu.net/baseimages/pause:3.7
Image is up to date for sha256:221177c6082a88ea4f6240ab2450d540955ac6f4d5454f0e15751b653ebda165
[root@k8s-master1 ~]#crictl images
IMAGE TAG IMAGE ID SIZE
harbor.chu.net/baseimages/pause 3.7 221177c6082a8 309kB
4.4.5 部署node
配置harbor域名解析
ssh 10.0.0.41 "echo '10.0.0.100 harbor.chu.net'>>/etc/hosts"
ssh 10.0.0.42 "echo '10.0.0.100 harbor.chu.net'>>/etc/hosts"
执行命令
/etc/kubeasz/ezctl setup k8s-cluster1 05
查看状态
[root@k8s-deploy ~]#kubectl get node
NAME STATUS ROLES AGE VERSION
10.0.0.11 Ready,SchedulingDisabled master 3m50s v1.24.2
10.0.0.12 Ready,SchedulingDisabled master 3m50s v1.24.2
10.0.0.41 Ready node 50s v1.24.2
10.0.0.42 Ready node 50s v1.24.2
4.4.6 部署网络服务calico
- 查看calico相关镜像
[root@k8s-deploy ~]#docker images|grep "^calico"
calico/node v3.19.4 172a034f7297 11 months ago 155MB
calico/pod2daemon-flexvol v3.19.4 054ddbbe5975 11 months ago 20MB
calico/cni v3.19.4 84358b137f83 11 months ago 146MB
calico/kube-controllers v3.19.4 0db60d880d2d 11 months ago 60.6MB
- 配置镜像并上传至harbor仓库
docker tag calico/node:v3.19.4 harbor.chu.net/baseimages/calico-node:v3.19.4
docker tag calico/pod2daemon-flexvol:v3.19.4 harbor.chu.net/baseimages/calico-pod2daemon-flexvol:v3.19.4
docker tag calico/cni:v3.19.4 harbor.chu.net/baseimages/calico-cni:v3.19.4
docker tag calico/kube-controllers:v3.19.4 harbor.chu.net/baseimages/calico-kube-controllers:v3.19.4
docker push harbor.chu.net/baseimages/calico-node:v3.19.4
docker push harbor.chu.net/baseimages/calico-pod2daemon-flexvol:v3.19.4
docker push harbor.chu.net/baseimages/calico-cni:v3.19.4
docker push harbor.chu.net/baseimages/calico-kube-controllers:v3.19.4
- 修改
/etc/kubeasz/roles/calico/templates/calico-v3.19.yaml.j2
配置文件
sed -i -e 's#easzlab.io.local:5000/calico/cni:{{ calico_ver }}#harbor.chu.net/baseimages/calico-cni:v3.19.4#g' \
-e 's#easzlab.io.local:5000/calico/pod2daemon-flexvol:{{ calico_ver }}#harbor.chu.net/baseimages/calico-pod2daemon-flexvol:v3.19.4#g' \
-e 's#easzlab.io.local:5000/calico/node:{{ calico_ver }}#harbor.chu.net/baseimages/calico-node:v3.19.4#g' \
-e 's#easzlab.io.local:5000/calico/kube-controllers:{{ calico_ver }}#harbor.chu.net/baseimages/calico-kube-controllers:v3.19.4#g' \
/etc/kubeasz/roles/calico/templates/calico-v3.19.yaml.j2
- 执行命令
/etc/kubeasz/ezctl setup k8s-cluster1 06
- 验证calico
[root@k8s-master1 ~]#calicoctl node status
Calico process is running.
IPv4 BGP status
+--------------+-------------------+-------+----------+-------------+
| PEER ADDRESS | PEER TYPE | STATE | SINCE | INFO |
+--------------+-------------------+-------+----------+-------------+
| 10.0.0.41 | node-to-node mesh | up | 17:56:45 | Established |
| 10.0.0.42 | node-to-node mesh | up | 17:56:45 | Established |
| 10.0.0.12 | node-to-node mesh | up | 17:56:45 | Established |
+--------------+-------------------+-------+----------+-------------+
IPv6 BGP status
No IPv6 peers found.
[root@k8s-master1 ~]#kubectl get pod -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system calico-kube-controllers-dc77788f9-z9rx4 1/1 Running 0 4m40s
kube-system calico-node-9xgqg 1/1 Running 0 4m41s
kube-system calico-node-gpfxv 1/1 Running 0 4m41s
kube-system calico-node-zbzp2 1/1 Running 0 4m40s
kube-system calico-node-zqcll 1/1 Running 0 4m40s
4.4.7 验证网络
# 创建pod测试夸主机网络通信是否正常
kubectl run net-test1 --image=alpine sleep 1000
kubectl run net-test2 --image=alpine sleep 1000
查看网络信息
[root@k8s-deploy ~]#kubectl get pod -A -o wide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
default net-test1 1/1 Running 0 32s 10.200.169.129 10.0.0.42 <none> <none>
default net-test2 1/1 Running 0 25s 10.200.169.130 10.0.0.42 <none> <none>
kube-system calico-kube-controllers-dc77788f9-z9rx4 1/1 Running 0 10m 10.0.0.42 10.0.0.42 <none> <none>
kube-system calico-node-9xgqg 1/1 Running 0 10m 10.0.0.11 10.0.0.11 <none> <none>
kube-system calico-node-gpfxv 1/1 Running 0 10m 10.0.0.41 10.0.0.41 <none> <none>
kube-system calico-node-zbzp2 1/1 Running 0 10m 10.0.0.42 10.0.0.42 <none> <none>
kube-system calico-node-zqcll 1/1 Running 0 10m 10.0.0.12 10.0.0.12 <none> <none>
进入pod进行网络测试
[root@k8s-deploy ~]#kubectl exec -it net-test1 sh
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
/ # ping 223.6.6.6 #ping外网正常
PING 223.6.6.6 (223.6.6.6): 56 data bytes
64 bytes from 223.6.6.6: seq=0 ttl=127 time=49.701 ms
64 bytes from 223.6.6.6: seq=1 ttl=127 time=277.220 ms
/ # ping 10.200.169.130 #ping其他pod网络正常
PING 10.200.169.130 (10.200.169.130): 56 data bytes
64 bytes from 10.200.169.130: seq=0 ttl=63 time=0.907 ms
64 bytes from 10.200.169.130: seq=1 ttl=63 time=0.117 ms
/ #
/ # ping 10.0.0.12 #ping宿主机网络正常
PING 10.0.0.12 (10.0.0.12): 56 data bytes
64 bytes from 10.0.0.12: seq=0 ttl=63 time=1.169 ms
64 bytes from 10.0.0.12: seq=1 ttl=63 time=0.347 ms
(二)kubernetes升级
1. kubernetes升级
选择k8s版本下载组件:https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.24.md#downloads-for-v1248
cd /usr/local/src
# 下载k8s更新组件
wget https://dl.k8s.io/v1.24.8/kubernetes.tar.gz
wget https://dl.k8s.io/v1.24.8/kubernetes-client-linux-amd64.tar.gz
wget https://dl.k8s.io/v1.24.8/kubernetes-server-linux-amd64.tar.gz
wget https://dl.k8s.io/v1.24.8/kubernetes-node-linux-amd64.tar.gz
# 解压
tar xvf kubernetes.tar.gz
tar xvf kubernetes-client-linux-amd64.tar.gz
tar xvf kubernetes-server-linux-amd64.tar.gz
tar xvf kubernetes-node-linux-amd64.tar.gz
# 查看解压后文件
root@k8s-deploy src]#ls /usr/local/src/kubernetes
LICENSES README.md addons client cluster docs hack kubernetes-src.tar.gz node server version
更新升级版本组件至部署仓库中,后续新增加节点时会自动使用新版本
cd /usr/local/src/kubernetes/server/bin
\cp kube-apiserver kube-controller-manager kube-scheduler kube-proxy kubelet kubectl /etc/kubeasz/bin/
1.1 升级master
- 依次升级master时,需修改所有node节点负载均衡配置,下线master节点
NODES="10.0.0.41 10.0.0.42"
for IP in ${NODES};do
ssh $IP "sed -i 's/server 10.0.0.11:6443/#server 10.0.0.11:6443/g' /etc/kube-lb/conf/kube-lb.conf"
ssh $IP "systemctl reload kube-lb.service"
done
查看node节点kube-lb.conf配置
[root@k8s-deploy bin]#ssh 10.0.0.41 "cat /etc/kube-lb/conf/kube-lb.conf"
user root;
worker_processes 1;
error_log /etc/kube-lb/logs/error.log warn;
events {
worker_connections 3000;
}
stream {
upstream backend {
#server 10.0.0.11:6443 max_fails=2 fail_timeout=3s; #注释掉升级的master节点
server 10.0.0.12:6443 max_fails=2 fail_timeout=3s;
}
server {
listen 127.0.0.1:6443;
proxy_connect_timeout 1s;
proxy_pass backend;
}
}
- 升级master组件
# 停止master节点相关服务
ssh 10.0.0.11 "systemctl stop kube-apiserver kube-controller-manager kube-scheduler kube-proxy kubelet"
# 替换新版本组件
cd /usr/local/src/kubernetes/server/bin
scp kube-apiserver kube-controller-manager kube-scheduler kube-proxy kubelet kubectl 10.0.0.11:/usr/local/bin/
# 启动master节点相关服务
ssh 10.0.0.11 "systemctl start kube-apiserver kube-controller-manager kube-scheduler kube-proxy kubelet"
- 修改node节点负载均衡配置,上线master节点
NODES="10.0.0.41 10.0.0.42"
for IP in ${NODES};do
ssh $IP "sed -i 's/#server 10.0.0.11:6443/server 10.0.0.11:6443/g' /etc/kube-lb/conf/kube-lb.conf"
ssh $IP "systemctl reload kube-lb.service"
done
master2按相同方法升级
Master=10.0.0.12
# node节点负载均衡下线master
NODES="10.0.0.41 10.0.0.42"
for IP in ${NODES};do
ssh $IP "sed -i 's/server ${Master}:6443/#server ${Master}:6443/g' /etc/kube-lb/conf/kube-lb.conf"
ssh $IP "systemctl reload kube-lb.service"
done
# 停止master节点相关服务
ssh ${Master} "systemctl stop kube-apiserver kube-controller-manager kube-scheduler kube-proxy kubelet"
# 替换新版本组件
cd /usr/local/src/kubernetes/server/bin
scp kube-apiserver kube-controller-manager kube-scheduler kube-proxy kubelet kubectl ${Master}:/usr/local/bin/
# 启动master节点相关服务
ssh ${Master} "systemctl start kube-apiserver kube-controller-manager kube-scheduler kube-proxy kubelet"
# node节点负载均衡上线master
for IP in ${NODES};do
ssh $IP "sed -i 's/#server ${Master}:6443/server ${Master}:6443/g' /etc/kube-lb/conf/kube-lb.conf"
ssh $IP "systemctl reload kube-lb.service"
done
- 查看master节点版本
[root@k8s-deploy bin]#kubectl get node
NAME STATUS ROLES AGE VERSION
10.0.0.11 Ready,SchedulingDisabled master 24h v1.24.8
10.0.0.12 Ready,SchedulingDisabled master 24h v1.24.8
10.0.0.41 Ready node 12h v1.24.2
10.0.0.42 Ready node 24h v1.24.2
1.2 升级node
# 以node1为例
Node=10.0.0.41
# 标记不可调度
kubectl cordon ${Node}
# 驱逐pod
kubectl drain ${Node} --ignore-daemonsets --delete-emptydir-data --force
# 停止服务
ssh ${Node} "systemctl stop kube-proxy kubelet"
# 替换新版本组件
cd /usr/local/src/kubernetes/server/bin
scp kube-proxy kubelet kubectl ${Node}:/usr/local/bin/
# 启动服务
ssh ${Node} "systemctl start kube-proxy kubelet"
# 取消不可调度
kubectl uncordon ${Node}
查看node节点版本
[root@k8s-deploy bin]#kubectl get node
NAME STATUS ROLES AGE VERSION
10.0.0.11 Ready,SchedulingDisabled master 24h v1.24.8
10.0.0.12 Ready,SchedulingDisabled master 24h v1.24.8
10.0.0.41 Ready node 12h v1.24.8
10.0.0.42 Ready node 24h v1.24.8
2. containerd升级
2.1 下载版本
选择containerd下载版本:https://github.com/containerd/containerd/releases
mkdir -p /usr/local/src/containerd
cd /usr/local/src/containerd
# 下载containerd
wget https://github.com/containerd/containerd/releases/download/v1.6.10/containerd-1.6.10-linux-amd64.tar.gz
# 解压
tar xvf containerd-1.6.10-linux-amd64.tar.gz
# 查看解压后文件
[root@k8s-deploy containerd]#ls /usr/local/src/containerd/bin
containerd containerd-shim containerd-shim-runc-v1 containerd-shim-runc-v2 containerd-stress ctr
# 查看containerd版本
[root@k8s-deploy bin]#./containerd --version
containerd github.com/containerd/containerd v1.6.10 770bd0108c32f3fb5c73ae1264f7e503fe7b2661
更新升级版本组件至部署仓库中,后续新增加节点时会自动使用新版本
\cp /usr/local/src/containerd/bin/* /etc/kubeasz/bin/containerd-bin/
2.2 执行升级
升级containerd正常情况下,node节点须先驱逐pod,然后将服务停止或重启服务器,然后替换二进制再启动服务
说明:驱逐pod
升级master节点containerd
#! /bin/bash
# 以master1为例
Node=10.0.0.11
# 停止服务
ssh ${Node} "systemctl stop kube-proxy kubelet containerd"
# 替换新版本组件
cd /usr/local/src/containerd/bin
scp ./* ${Node}:/usr/local/bin/
# 启动服务
ssh ${Node} "systemctl start kube-proxy kubelet containerd"
升级node节点containerd
#! /bin/bash
# 以node1为例
Node=10.0.0.41
# 标记不可调度
kubectl cordon ${Node}
# 驱逐pod
kubectl drain ${Node} --ignore-daemonsets --delete-emptydir-data --force
# 停止服务
ssh ${Node} "systemctl stop kube-proxy kubelet containerd"
# 替换新版本组件
cd /usr/local/src/containerd/bin
scp ./* ${Node}:/usr/local/bin/
# 启动服务
ssh ${Node} "systemctl start kube-proxy kubelet containerd"
# 取消不可调度
kubectl uncordon ${Node}
2.3 验证containerd版本
[root@k8s-deploy home]#kubectl get node -owide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
10.0.0.11 Ready,SchedulingDisabled master 36h v1.24.8 10.0.0.11 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
10.0.0.12 Ready,SchedulingDisabled master 36h v1.24.8 10.0.0.12 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
10.0.0.41 Ready node 24h v1.24.8 10.0.0.41 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
10.0.0.42 Ready node 36h v1.24.8 10.0.0.42 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
说明:runc升级与containerd方法相同
(三)kubernetes横向扩容
1. 添加master
环境配置
IP=10.0.0.13
# 配置免密登录
sshpass -p 123456 ssh-copy-id $IP -o StrictHostKeyChecking=no
# 添加harbor域名解析
ssh $IP "echo '10.0.0.100 harbor.chu.net'>>/etc/hosts"
执行添加node命令
/etc/kubeasz/ezctl add-master k8s-cluster1 10.0.0.13
查看集群状态
[root@k8s-deploy ~]#kubectl get node -owide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
10.0.0.11 Ready,SchedulingDisabled master 36h v1.24.8 10.0.0.11 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
10.0.0.12 Ready,SchedulingDisabled master 36h v1.24.8 10.0.0.12 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
10.0.0.13 Ready,SchedulingDisabled master 74s v1.24.8 10.0.0.13 <none> Ubuntu 20.04.4 LTS 5.4.0-137-generic containerd://1.6.10
10.0.0.41 Ready node 24h v1.24.8 10.0.0.41 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
10.0.0.42 Ready node 36h v1.24.8 10.0.0.42 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
2. 添加node
环境配置
IP=10.0.0.43
# 配置免密登录
sshpass -p 123456 ssh-copy-id $IP -o StrictHostKeyChecking=no
# 添加harbor域名解析
ssh $IP "echo '10.0.0.100 harbor.chu.net'>>/etc/hosts"
执行添加node命令
/etc/kubeasz/ezctl add-node k8s-cluster1 10.0.0.43
查看集群状态,k8s、containerd均为升级后版本
[root@k8s-deploy ~]#kubectl get node -owide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
10.0.0.11 Ready,SchedulingDisabled master 36h v1.24.8 10.0.0.11 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
10.0.0.12 Ready,SchedulingDisabled master 36h v1.24.8 10.0.0.12 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
10.0.0.13 Ready,SchedulingDisabled master 5m7s v1.24.8 10.0.0.13 <none> Ubuntu 20.04.4 LTS 5.4.0-137-generic containerd://1.6.10
10.0.0.41 Ready node 24h v1.24.8 10.0.0.41 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
10.0.0.42 Ready node 36h v1.24.8 10.0.0.42 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
10.0.0.43 Ready node 59s v1.24.8 10.0.0.43 <none> Ubuntu 20.04.4 LTS 5.4.0-124-generic containerd://1.6.10
二、etcd的备份和恢复(基于快照)
WAL是write ahead log(预写日志)的缩写,也就是在执行真正的写操作之前先写一个日志,预写日志。
WAL:存放预写式日志,最大的作用是记录了整个数据变化的全部历程,在etcd中,所有数据的修改在提交前,都要先写入到WAL中。
v3版本备份数据
ETCDCTL_API=3 etcdctl snapshot save snapshot.db
v3版本恢复数据
# 将数据恢复到一个新的不存在的目录中(data-dir目录为空或不存在),或将原data-dir目录清空
ETCDCTL_API=3 etcdctl snapshot restore snapshot.db --data-dir=/opt/etcd-testdir
自动备份数据
mkdir -p /data/etcd-backup-dir
#! /bin/bash
source /etc/profile
DATE=`date +%Y-%m-%d_%H-%M-%S`
ETCDCTL_API=3 etcdctl snapshot save /data/etcd-backup-dir/etcd-snapshot-${DATE}.db
etcd备份
创建测试pod
[root@k8s-deploy ~]#kubectl get pod -n myserver
NAME READY STATUS RESTARTS AGE
net-test1 1/1 Running 0 102s
net-test2 1/1 Running 0 98s
net-test3 1/1 Running 0 94s
net-test4 1/1 Running 0 90s
net-test5 1/1 Running 0 87s
net-test6 1/1 Running 0 83s
net-test7 1/1 Running 0 79s
net-test8 1/1 Running 0 75s
执行备份
/etc/kubeasz/ezctl backup k8s-cluster1
查看备份快照
[root@k8s-deploy ~]#ll /etc/kubeasz/clusters/k8s-cluster1/backup/
total 10716
drwxr-xr-x 2 root root 89 Jan 19 21:42 ./
drwxr-xr-x 5 root root 203 Jan 19 13:48 ../
-rw------- 1 root root 3653664 Jan 19 21:42 snapshot.db #最新快照数据(snapshot_202301192142.db)
-rw------- 1 root root 3653664 Jan 19 21:41 snapshot_202301192141.db
-rw------- 1 root root 3653664 Jan 19 21:42 snapshot_202301192142.db
etcd恢复
准备测试环境
# 删除部分pod
[root@k8s-deploy ~]#kubectl delete pod net-test1 net-test3 net-test5 -n myserver
pod "net-test1" deleted
pod "net-test3" deleted
pod "net-test5" deleted
[root@k8s-deploy ~]#kubectl get pod -n myserver
NAME READY STATUS RESTARTS AGE
net-test2 1/1 Running 0 10m
net-test4 1/1 Running 0 10m
net-test6 1/1 Running 0 10m
net-test7 1/1 Running 0 10m
net-test8 1/1 Running 0 10m
- 恢复流程
当etcd集群宕机数量超过集群总节点数一半以上的时候,就会导致整个集群宕机,后期需要重新恢复数据,恢复流程如下:
- 恢复服务器系统
- 重新部署etcd集群
- 停止kube-apiserver、kube-controller-manager、kube-scheduler、kube-proxy、kubelet服务
- 停止etcd集群
- 各etcd节点恢复同一份备份数据
- 启动各节点并验证etcd集群
- 启动kube-apiserver、kube-controller-manager、kube-scheduler、kube-proxy、kubelet
- 验证k8s master状态及pod数据
注意:在恢复数据期间API server不可用
- 执行恢复
/etc/kubeasz/ezctl restore k8s-cluster1
执行结果
PLAY RECAP ********************************************************************************************************************************************************************************************************************
10.0.0.11 : ok=5 changed=4 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
10.0.0.12 : ok=5 changed=4 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
10.0.0.13 : ok=5 changed=4 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
10.0.0.21 : ok=10 changed=7 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
10.0.0.22 : ok=10 changed=8 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
10.0.0.23 : ok=10 changed=8 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
10.0.0.41 : ok=3 changed=2 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
10.0.0.42 : ok=3 changed=2 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
10.0.0.43 : ok=3 changed=2 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
- 验证恢复状态
[root@k8s-deploy ~]#kubectl get pod -n myserver
NAME READY STATUS RESTARTS AGE
net-test1 1/1 Running 0 17m
net-test2 1/1 Running 0 17m
net-test3 1/1 Running 0 17m
net-test4 1/1 Running 0 17m
net-test5 1/1 Running 0 17m
net-test6 1/1 Running 0 17m
net-test7 1/1 Running 0 17m
net-test8 1/1 Running 0 17m
三、整理coredns的域名解析流程和Corefile配置
CoreDNS 是一个DNS服务器。它是用Go编写的。由于其灵活性,它可以在多种环境中使用。目前K8s主要使用CoreDNS对为集群提供域名解析服务。
(一)coredns域名解析流程
以容器nginx访问百度为例:
- 容器nginx ping一下百度域名baidu.com。
- 该请求会先被kube-dns(Coredns服务)捕获。
- 域名解析转发到coredns集群,根据负载均衡会分配到某个coredns pod。
- coredns pod再通过api-server转到k8s集群服务。
- 最后k8s集群从etcd数据库中获取到域名解析结果。
- etcd把结果原路返回到k8s,依次类推,Nginx获取到baidu对应的IP地址。
- 解析结果会保存到域名缓存,下次访问会更加快速。
(二)安装coredns
https://github.com/coredns/deployment/tree/master/kubernetes
1. 下载yaml文件
wget https://github.com/coredns/deployment/blob/master/kubernetes/coredns.yaml.sed
# 或者使用k8s官方下载组件中coredns文件
[root@k8s-deploy ~]#ll /usr/local/src/kubernetes/cluster/addons/dns/coredns/
total 36
drwxr-xr-x 2 root root 147 Nov 9 21:55 ./
drwxr-xr-x 5 root root 71 Nov 9 21:55 ../
-rw-r--r-- 1 root root 1075 Nov 9 21:55 Makefile
-rw-r--r-- 1 root root 5060 Nov 9 21:55 coredns.yaml.base
-rw-r--r-- 1 root root 5110 Nov 9 21:55 coredns.yaml.in
-rw-r--r-- 1 root root 5112 Nov 9 21:55 coredns.yaml.sed
-rw-r--r-- 1 root root 344 Nov 9 21:55 transforms2salt.sed
-rw-r--r-- 1 root root 287 Nov 9 21:55 transforms2sed.sed
## 使用coredns.yaml.base模板
cp /usr/local/src/kubernetes/cluster/addons/dns/coredns/coredns.yaml.base /root/coredns.yaml
2. 修改配置
vim /root/coredns.yaml
...
# 修改__DNS__DOMAIN__,可查看/etc/kubeasz/clusters/k8s-cluster/hosts文件CLUSTER_DNS_DOMAIN
77 kubernetes cluster.local in-addr.arpa ip6.arpa {
# 指定互联网域名服务器,最大连接数可扩大
83 forward . 223.6.6.6 {
84 max_concurrent 2000
85 }
# pod副本数量,取消注释后,修改为2
103 replicas: 2
# 选择coredns镜像源,可在部署节点docker images查看
# 1. 须先下载镜像,docker pull coredns/coredns:1.9.3
# 2. 镜像打tag,docker tag coredns/coredns:1.9.3 harbor.chu.net/baseimages/coredns:1.9.3
# 3. 上传镜像至本地harbor镜像仓库,docker push harbor.chu.net/baseimages/coredns:1.9.3
142 image: harbor.chu.net/baseimages/coredns:1.9.3
# 资源限制放宽,生产环境4核4G以上
145 limits:
146 cpu: 200m
147 memory: 256Mi
# kube-dns地址,可查看在某个pod的/etc/resolv.conf中nameserver值
210 selector:
211 k8s-app: kube-dns
212 clusterIP: 10.100.0.2
3. 创建coredns
kubectl apply -f /root/coredns.yaml
查看状态
# 查看pod
[root@k8s-deploy ~]#kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-dc77788f9-h4zx9 1/1 Running 1 (3h54m ago) 12h
calico-node-7vrx7 1/1 Running 1 (11h ago) 12h
calico-node-fm6dv 1/1 Running 0 11h
calico-node-h6889 1/1 Running 0 12h
calico-node-q54lc 1/1 Running 1 (11h ago) 12h
calico-node-xmtn9 1/1 Running 0 12h
calico-node-xt5lm 1/1 Running 0 12h
coredns-bd5d4b5fb-dxvcr 1/1 Running 0 80s
coredns-bd5d4b5fb-p9s4l 1/1 Running 0 80s
# 查看svc
[root@k8s-deploy ~]#kubectl get svc -A
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
default kubernetes ClusterIP 10.100.0.1 <none> 443/TCP 1d
kube-system kube-dns ClusterIP 10.100.0.2 <none> 53/UDP,53/TCP,9153/TCP 59s
4. 验证测试
查看pod
[root@k8s-deploy ~]#kubectl get pod -n myserver
NAME READY STATUS RESTARTS AGE
net-test1 1/1 Running 1 (73m ago) 4h17m
net-test2 1/1 Running 1 (90m ago) 4h17m
net-test3 1/1 Running 1 (73m ago) 4h17m
net-test4 1/1 Running 1 (89m ago) 4h16m
net-test5 1/1 Running 1 (73m ago) 4h16m
net-test6 1/1 Running 1 (90m ago) 4h16m
net-test7 1/1 Running 1 (89m ago) 4h16m
net-test8 1/1 Running 1 (89m ago) 4h16m
选择进入容器,ping www.baidu.com测试
[root@k8s-deploy ~]#kubectl exec -it net-test4 -n myserver sh
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
/ # ping www.baidu.com
PING www.baidu.com (183.232.231.172): 56 data bytes
64 bytes from 183.232.231.172: seq=0 ttl=127 time=85.333 ms
64 bytes from 183.232.231.172: seq=1 ttl=127 time=65.458 ms
(三)corefile配置说明
[root@k8s-deploy ~]#cat coredns.yaml
...
# corefile选项配置说明
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system
labels:
addonmanager.kubernetes.io/mode: EnsureExists
data:
Corefile: |
.:53 {
errors #错误信息标准输出
health { #在CoreDNS的http://localhost:8080/health端口提供CoreDNS服务的健康报告
lameduck 5s
}
ready #监听8181端口,当CoreDNS的插件都已就绪时,访问该接口会返回200 OK
kubernetes cluster.local in-addr.arpa ip6.arpa { #CoreDNS将基于kubernetes service name进行DNS查询并返回查询记录给客户端
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
prometheus :9153 #CoreDNS的度量指标数据已Prometheus的key-value的格式在http://localhost:9153/metrics URI上提供
forward . /etc/resolv.conf { #不是kubernetes集群内的其他任何域名查询都将转发到预定义的目的server,如/etc/resolv.conf或IP(223.6.6.6)
max_concurrent 1000 #最大连接数
}
cache 30 #启用service解析缓存,单位为秒
loop #检测域名解析是否有死循环,如coredns转发给内网DNS服务器,而内网DNS服务又转发给CoreDNS,如果发现解析是死循环,则强制中止CoreDNS进程(kubernetes会重建)
reload #检测corefile是否更改,在重新编辑configmap配置后,默认2分钟后优雅的自动加载。
loadbalance #轮训DNS域名解析,如果一个域名存在多个记录则轮训解析。
}
...
四、dashboard的使用
https://github.com/kubernetes/dashboard
kubernetes dashboard是用于kubernetes集群的前端UI组件,允许用户管理集群中运行的应用程序并对其进行故障排查,以及管理集群本身。
部署dashboard
https://github.com/kubernetes/dashboard/releases/tag/v2.6.1
1. 安装dashboard
# 下载yaml文件
wget https://raw.githubusercontent.com/kubernetes/dashboard/v2.6.1/aio/deploy/recommended.yaml
# 重命名
mv recommended.yaml dashboard-v2.6.1.yaml
# 修改port类型为nodeport,端口30000
# 端口号范围可在/etc/kubeasz/clusters/k8s-cluster1/hosts中NODE_PORT_RANGE,NODE_PORT_RANGE="30000-62767"
32 kind: Service
33 apiVersion: v1
34 metadata:
35 labels:
36 k8s-app: kubernetes-dashboard
37 name: kubernetes-dashboard
38 namespace: kubernetes-dashboard
39 spec:
type: NodePort #新增nodeport
ports:
- port: 443
targetPort: 8443
nodePort: 30000 #暴露端口号
selector:
k8s-app: kubernetes-dashboard
[root@k8s-deploy ~]#grep "image:" dashboard-v2.6.1.yaml
image: kubernetesui/dashboard:v2.6.1
image: kubernetesui/metrics-scraper:v1.0.8
# 下载镜像
docker pull kubernetesui/dashboard:v2.6.1
docker pull kubernetesui/metrics-scraper:v1.0.8
# 镜像打tag
docker tag kubernetesui/dashboard:v2.6.1 harbor.chu.net/baseimages/dashboard:v2.6.1
docker tag kubernetesui/metrics-scraper:v1.0.8 harbor.chu.net/baseimages/metrics-scraper:v1.0.8
# 上传镜像至本地harbor仓库
docker push harbor.chu.net/baseimages/dashboard:v2.6.1
docker push harbor.chu.net/baseimages/metrics-scraper:v1.0.8
# 修改dashboard-v2.6.1.yaml镜像
sed -i -e "s#kubernetesui/dashboard:v2.6.1#harbor.chu.net/baseimages/dashboard:v2.6.1#g" \
-e "s#kubernetesui/metrics-scraper:v1.0.8#harbor.chu.net/baseimages/metrics-scraper:v1.0.8#g" \
dashboard-v2.6.1.yaml
# 创建dashboard
kubectl apply -f dashboard-v2.6.1.yaml
查看service
[root@k8s-deploy ~]#kubectl get svc -A
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
default kubernetes ClusterIP 10.100.0.1 <none> 443/TCP 2d14h
kube-system kube-dns ClusterIP 10.100.0.2 <none> 53/UDP,53/TCP,9153/TCP 13h
kubernetes-dashboard dashboard-metrics-scraper ClusterIP 10.100.75.153 <none> 8000/TCP 19m
kubernetes-dashboard kubernetes-dashboard NodePort 10.100.9.93 <none> 443:30000/TCP 19m
2. 创建用户
- 创建用户
准备admin-user.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin-user
namespace: kubernetes-dashboard
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: admin-user
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: admin-user
namespace: kubernetes-dashboard
执行创建
kubectl apply -f admin-user.yaml
- 创建secret
k8s 1.24之后版本需要手动创建secret
准备admin-secret.yaml
apiVersion: v1
kind: Secret
type: kubernetes.io/service-account-token
metadata:
name: dashboard-admin-user
namespace: kubernetes-dashboard
annotations:
kubernetes.io/service-account.name: "admin-user"
执行创建
kubectl apply -f admin-secret.yaml
- 查看secret
[root@k8s-deploy ~]#kubectl get secrets -A
NAMESPACE NAME TYPE DATA AGE
kube-system calico-etcd-secrets Opaque 3 2d13h
kubernetes-dashboard dashboard-admin-user kubernetes.io/service-account-token 3 10s
kubernetes-dashboard kubernetes-dashboard-certs Opaque 0 50m
kubernetes-dashboard kubernetes-dashboard-csrf Opaque 1 50m
kubernetes-dashboard kubernetes-dashboard-key-holder Opaque 2 50m
3. 登录网页
- 查看token
[root@k8s-deploy ~]#kubectl describe secrets dashboard-admin-user -n kubernetes-dashboard
Name: dashboard-admin-user
Namespace: kubernetes-dashboard
Labels: <none>
Annotations: kubernetes.io/service-account.name: admin-user
kubernetes.io/service-account.uid: 5ccb1159-7576-46e3-823f-50853abf0e32
Type: kubernetes.io/service-account-token
Data
====
ca.crt: 1302 bytes
namespace: 20 bytes
token: eyJhbGciOiJSUzI1NiIsImtpZCI6IkxvRVYzREFvWVZxUE1CZkVlT3R6Xy1VRF9Ed3NSdXdjUkVTSXlBMWltRWsifQ.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJrdWJlcm5ldGVzLWRhc2hib2FyZCIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VjcmV0Lm5hbWUiOiJkYXNoYm9hcmQtYWRtaW4tdXNlciIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50Lm5hbWUiOiJhZG1pbi11c2VyIiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9zZXJ2aWNlLWFjY291bnQudWlkIjoiNWNjYjExNTktNzU3Ni00NmUzLTgyM2YtNTA4NTNhYmYwZTMyIiwic3ViIjoic3lzdGVtOnNlcnZpY2VhY2NvdW50Omt1YmVybmV0ZXMtZGFzaGJvYXJkOmFkbWluLXVzZXIifQ.FfIiGpg7RZ6pKnNPKpHIdaXF58kkMG_64nl9Cf2uON0Xx0xRYt6sfSwb9PsJX-OR5P3N7ZjCAmNZd8pJC0PrdHR7orpzzjop0Zd6npCrg18GMm9fBT_L1WY7e3IrWB-V6fAHl03_mUG0bdCzbP4IbpmMEAIJ4KrUse6kU-izZb13MlHqGPjFsdgKxWAkjKJF8BSjKAztF9thEyhdO2XxnEHd1pl9fdoU328UcxgbDcAuKOWJOoTeJy8NXm9lrgxDQOzdcze1plLBXOIhmPzng2NBhgy-hOOBT8KPqRYhJt7nnUe-DkUlHz6ONJEMbgX5vqVDlScnCpwKO3N59qvLbw
- 登录网页
浏览器输入https://ip:30000
输入上面获取到的token
进入管理首页
dashboard常用操作
查看集群整体概览资源
查看整体集群资源,包括应用负载,Pod的资源使用情况
集群资源管理
管理Nodes,Namespace,StorageClass等Cluster集群资源,提供在线编辑yaml方式
查看工作负载Workloads
查看应用工作负载workloads,包含各种不同的工作负载如Deployments,StatefulSets,Jobs等
部署Deployments工作负载
支持从YAML文件输入,YAML文件加载和图形界面部署应用
工作负载管理
主要包括工作负载副本数目扩展,滚动更新等
远程登录容器和查看容器日志
标签:10.0,kube,kubernetes,harbor,net,k8s,root From: https://www.cnblogs.com/areke/p/17063036.html