192.168.190.200 k8s-master1
192.168.190.202 k8s-node1
192.168.190.201 k8s-master2
192.168.190.110 vip
#查看 yum 中可升级的内核版本
yum list kernel --showduplicates
#如果list中有需要的版本可以直接执行 update 升级,多数是没有的,所以要按以下步骤操作
#导入ELRepo软件仓库的公共秘钥
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
#Centos7系统安装ELRepo
yum -y install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm
#Centos8系统安装ELRepo
yum -y install https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm
#查看ELRepo提供的内核版本
yum --disablerepo="*" --enablerepo="elrepo-kernel" list available
#内核稳定版本下载
yum --enablerepo=elrepo-kernel install kernel-ml.x86_64 -y
#查看可以使用的内核版本
awk -F\' '$1=="menuentry " {print i++ " : " $2}' /etc/grub2.cfg
0 : CentOS Linux (6.0.7-1.el7.elrepo.x86_64) 7 (Core)
1 : CentOS Linux (3.10.0-1160.76.1.el7.x86_64) 7 (Core)
2 : CentOS Linux (3.10.0-514.el7.x86_64) 7 (Core)
3 : CentOS Linux (0-rescue-d6f17150094f485f87bb326394fb8e00) 7 (Core)
#指定开机启动内核版本
grub2-set-default 0 或者 grub2-set-default 'CentOS Linux (6.0.7-1.el7.elrepo.x86_64) 7 (Core)'
#生成 grub 配置文件
grub2-mkconfig -o /boot/grub2/grub.cfg
#重启系统,验证
reboot
uname -r
6.0.7-1.el7.elrepo.x86_64
1、关闭防火墙和selinux
sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config
setenforce 0
systemctl stop firewalld
systemctl disable firewalld
systemctl stop NetworkManager
systemctl disable NetworkManager
2、配置hosts解析
cat >> /etc/hosts << EOF
192.168.190.200 k8s-master1
192.168.190.202 k8s-node1
192.168.190.201 k8s-master2
EOF
3、关闭swap分区(避免有性能等其他问题)
swapoff -a #临时关闭
sed -i "s/^.swap/#&/" /etc/fstab #永久关闭
mount -a
4、将桥接的IPV4流量传递到iptables的链
cat > /etc/sysctl.d/k8s.conf << EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
modprobe br_netfilter #载入模块
sysctl -p /etc/sysctl.d/k8s.conf #生效
5、配置ntp
yum -y install ntpdate
ntpdate ntp1.aliyun.com
sed -i "s/[#].iburst/#&/g" /etc/ntp.conf #注释原有server配置
sed -i "/server 3/a\server ntp.aliyun.com" /etc/ntp.conf #添加阿里云ntpserver
systemctl restart ntpd
systemctl enable ntpd
ntpq -p
配置免密
ssh-keygen
for i in k8s-master2 k8s-node1 ;do ssh-copy-id root@${i};done
docker部署
##修改docker源
yum -y install yum-utils
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo
yum makecache fast
yum list docker-ce --showduplicates | sort -r
yum install docker-ce-19.03.* -y
systemctl start docker && systemctl enable docker
修改docker数据目录(可选操作)
cat > /etc/docker/daemon.json << EOF
{
"data-root": "/home/docker"
}
EOF
重启
systemctl restart docker
下载cfssl
mkdir /opt/software && cd /opt/software
wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64
wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64
wget https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64
chmod o+x cfssl*
mv cfssl_linux-amd64 /usr/local/bin/cfssl
mv cfssljson_linux-amd64 /usr/local/bin/cfssljson
mv cfssl-certinfo_linux-amd64 /usr/bin/cfssl-certinfo
创建ca证书
mkdir -p ~/TLS/{etcd,k8s} && cd ~/TLS/etcd
cat > ca-config.json << EOF
{
"signing": {
"default": {
"expiry": "87600h"
},
"profiles": {
"etcd": {
"expiry": "87600h",
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
]
}
}
}
}
EOF
cat > ca-csr.json << EOF
{
"CN": "etcd CA",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "ShangHai",
"ST": "ShangHai"
}
]
}
EOF
[root@k8s-master1 etcd]# cfssl gencert -initca ca-csr.json | cfssljson -bare ca
当前目录下会生成 ca.pem和ca-key.pem文件
[root@k8s-master1 etcd]# ls
ca-config.json ca.csr ca-csr.json ca-key.pem ca.pem
创建etcd证书
cat > server-csr.json << EOF
{
"CN": "etcd",
"hosts": [
"192.168.190.200",
"192.168.190.201",
"192.168.190.202"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "ShangHai",
"ST": "ShangHai"
}
]
}
EOF
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=etcd server-csr.json | cfssljson -bare server
查看
[root@k8s-master1 etcd]# ls
ca-config.json ca.csr ca-csr.json ca-key.pem ca.pem server.csr server-csr.json server-key.pem server.pem
下载etcd二进制包
cd /opt/software
wget https://github.com/etcd-io/etcd/releases/download/v3.4.13/etcd-v3.4.13-linux-amd64.tar.gz
创建工作目录
mkdir -p /opt/etcd/{bin,cfg,ssl}
tar -zxvf etcd-v3.4.13-linux-amd64.tar.gz
cp etcd-v3.4.13-linux-amd64/{etcd,etcdctl} /opt/etcd/bin/
拷贝证书至工作目录
cp ~/TLS/etcd/*.pem /opt/etcd/ssl/
添加etcd配置
cat > /opt/etcd/cfg/etcd.conf << EOF
#[Member]
ETCD_NAME="etcd-1"
ETCD_DATA_DIR="/home/data/"
ETCD_LISTEN_PEER_URLS="https://192.168.190.200:2380"
ETCD_LISTEN_CLIENT_URLS="https://192.168.190.200:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.168.190.200:2380"
ETCD_ADVERTISE_CLIENT_URLS="https://192.168.190.200:2379"
ETCD_INITIAL_CLUSTER="etcd-1=https://192.168.190.200:2380,etcd-2=https://192.168.190.201:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
EOF
创建启动文件
cat > /usr/lib/systemd/system/etcd.service << EOF
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
EnvironmentFile=/opt/etcd/cfg/etcd.conf
ExecStart=/opt/etcd/bin/etcd \
--cert-file=/opt/etcd/ssl/server.pem \
--key-file=/opt/etcd/ssl/server-key.pem \
--peer-cert-file=/opt/etcd/ssl/server.pem \
--peer-key-file=/opt/etcd/ssl/server-key.pem \
--trusted-ca-file=/opt/etcd/ssl/ca.pem \
--peer-trusted-ca-file=/opt/etcd/ssl/ca.pem \
--force-new-cluster=true \
--logger=zap
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
拷贝配置到另外两个节点,配置文件 IP与etcd-name 记得修改
scp -r /opt/etcd/ 192.168.190.201:/opt/
scp -r /opt/etcd/ 192.168.190.202:/opt/
scp /usr/lib/systemd/system/etcd.service 192.168.190.201:/usr/lib/systemd/system/
scp /usr/lib/systemd/system/etcd.service 192.168.190.202:/usr/lib/systemd/system/
systemctl daemon-reload && systemctl --now etcd
systemctl start etcd
systemctl enable etcd
systemctl status etcd
查看集群节点状态如下即正常(记得修改命令中endpoint的IP为自己的IP)
[root@k8s-master1 software]# ETCDCTL_API=3 /opt/etcd/bin/etcdctl --cacert=/opt/etcd/ssl/ca.pem --cert=/opt/etcd/ssl/server.pem --key=/opt/etcd/ssl/server-key.pem --endpoints="https://192.168.190.200:2379,https://192.168.190.201:2379" endpoint health --write-out=table
+------------------------------+--------+--------------+-------+
| ENDPOINT | HEALTH | TOOK | ERROR |
+------------------------------+--------+--------------+-------+
| https://192.168.190.201:2379 | true | 10.236696ms | |
| https://192.168.190.200:2379 | true | 200.860823ms | |
+------------------------------+--------+--------------+-------+
3、部署master节点
生成kube-apiserver证书
自签CA证书(这个和上边那个etcd的CA区分开,单独给k8s使用的CA)
cd ~/TLS/k8s
添加CA配置
cat > ca-config.json << EOF
{
"signing": {
"default": {
"expiry": "87600h"
},
"profiles": {
"kubernetes": {
"expiry": "87600h",
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
]
}
}
}
}
EOF
cat > ca-csr.json << EOF
{
"CN": "kubernetes",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "ShangHai",
"ST": "ShangHai",
"O": "k8s",
"OU": "System"
}
]
}
EOF
生成ca证书
cfssl gencert -initca ca-csr.json | cfssljson -bare ca
自签CA签发kube-apiserver的证书
cat > apiserver-csr.json << EOF
{
"CN": "kubernetes",
"hosts": [
"10.0.0.240",
"127.0.0.1",
"192.168.190.200",
"192.168.190.202",
"192.168.190.203",
"192.168.190.201",
"192.168.190.110",
"kubernetes",
"kubernetes.default",
"kubernetes.default.svc",
"kubernetes.default.svc.cluster",
"kubernetes.default.svc.cluster.local"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "ShangHai",
"ST": "ShangHai",
"O": "k8s",
"OU": "System"
}
]
}
EOF
生成证书
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes apiserver-csr.json | cfssljson -bare apiserver
下载k8s1.23.13
cd /opt/software
wget https://dl.k8s.io/v1.23.13/kubernetes-server-linux-amd64.tar.gz
tar zxvf kubernetes-server-linux-amd64.tar.gz
mkdir -p /opt/kubernetes/{bin,cfg,ssl,logs}
cd kubernetes/server/bin
cp kube-apiserver kube-scheduler kube-controller-manager kubectl kubelet kube-proxy /opt/kubernetes/bin
cp kubectl /usr/bin
创建apiconfig.conf
cat > /opt/kubernetes/cfg/kube-apiserver.conf << EOF
KUBE_APISERVER_OPTS="--logtostderr=false \\
--v=2 \\
--log-dir=/opt/kubernetes/logs \\
--etcd-servers=https://192.168.190.200:2379,https://192.168.190.202:2379,https://192.168.190.203:2379 \\
--bind-address=192.168.190.200 \\
--secure-port=6443 \\
--advertise-address=192.168.190.200 \\
--allow-privileged=true \\
--service-cluster-ip-range=10.0.0.0/16 \\
--enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,ResourceQuota,NodeRestriction \\
--authorization-mode=RBAC,Node \\
--enable-bootstrap-token-auth=true \\
--token-auth-file=/opt/kubernetes/cfg/token.csv \\
--service-node-port-range=30000-32767 \\
--kubelet-client-certificate=/opt/kubernetes/ssl/apiserver.pem \\
--kubelet-client-key=/opt/kubernetes/ssl/apiserver-key.pem \\
--tls-cert-file=/opt/kubernetes/ssl/apiserver.pem \\
--tls-private-key-file=/opt/kubernetes/ssl/apiserver-key.pem \\
--client-ca-file=/opt/kubernetes/ssl/ca.pem \\
--service-account-key-file=/opt/kubernetes/ssl/ca-key.pem \\
--service-account-issuer=https://kubernetes.default.svc.cluster.local \\
--service-account-signing-key-file=/opt/kubernetes/ssl/ca-key.pem \\
--etcd-cafile=/opt/etcd/ssl/ca.pem \\
--etcd-certfile=/opt/etcd/ssl/server.pem \\
--etcd-keyfile=/opt/etcd/ssl/server-key.pem \\
--requestheader-client-ca-file=/opt/kubernetes/ssl/ca.pem \\
--proxy-client-cert-file=/opt/kubernetes/ssl/apiserver.pem \\
--proxy-client-key-file=/opt/kubernetes/ssl/apiserver-key.pem \\
--requestheader-allowed-names=kubernetes \\
--requestheader-extra-headers-prefix=X-Remote-Extra- \\
--requestheader-group-headers=X-Remote-Group \\
--requestheader-username-headers=X-Remote-User \\
--enable-aggregator-routing=true \\
--audit-log-maxage=30 \\
--audit-log-maxbackup=3 \\
--audit-log-maxsize=100 \\
--audit-log-path=/opt/kubernetes/logs/k8s-audit.log"
EOF
创建apiserver启动文件
cat > /usr/lib/systemd/system/kube-apiserver.service << EOF
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
[Service]
EnvironmentFile=/opt/kubernetes/cfg/kube-apiserver.conf
ExecStart=/opt/kubernetes/bin/kube-apiserver \$KUBE_APISERVER_OPTS
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
拷贝生成证书到工作目录
cp ~/TLS/k8s/*.pem /opt/kubernetes/ssl/
启用TLS bootstrapping机制
将生成的数创建token文件(将上边生成的数替换第一个值)
cat >/opt/kubernetes/cfg/token.csv << EOF
$(head -c 16 /dev/urandom | od -An -t x | tr -d ' '),kubelet-bootstrap,10001,"system:kubelet-bootstrap"
EOF
启动
systemctl daemon-reload
systemctl start kube-apiserver
systemctl enable kube-apiserver
systemctl status kube-apiserver
部署kube-controller-manager
cd ~/TLS/k8s
创建证书请求文件
cat > kube-controller-manager-csr.json << EOF
{
"CN": "system:kube-controller-manager",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "ShangHai",
"ST": "ShangHai",
"O": "system:masters",
"OU": "System"
}
]
}
EOF
生成证书
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-controller-manager-csr.json | cfssljson -bare kube-controller-manager
生成kubeconfig文件 (重点)
该文件存放一些集群组件之间交互的认证信息,用于集群组件访问apiserver,操作分为四步
前三步都会往配置文件里写入一些内容,可以每歩执行前后对照着内容看看
设置集群参数
配置个临时变量
KUBE_CONFIG="/opt/kubernetes/cfg/kube-controller-manager.kubeconfig"
KUBE_APISERVER="https://192.168.190.200:6443"
kubectl config set-cluster kubernetes \
--certificate-authority=/opt/kubernetes/ssl/ca.pem \
--embed-certs=true \
--server={KUBE_CONFIG}
该命令执行完会在指定目录下生成一个我们命名的那个叫kube-controller-manager.kubeconfig的文件,文件里只有集群的信息和CA证书内容
设置客户端认证参数
kubectl config set-credentials kube-controller-manager \
--client-certificate=./kube-controller-manager.pem \
--client-key=./kube-controller-manager-key.pem \
--embed-certs=true \
--kubeconfig=${KUBE_CONFIG}
配置介绍:
set-credentials:设置客户端名字,这里用连接apiserver的组件名称
–client-certificate:客户端的证书文件,apiserver用来做验证
–client-key:也是客户端证书,key文件
设置上下文参数
kubectl config set-context default \
--cluster=kubernetes \
--user=kube-controller-manager \
--kubeconfig=${KUBE_CONFIG}
配置介绍:
set-context:设置上下文,设置配置文件中的contexts项,后边跟上下文名称,这里设置为default(多用于操作多个k8s集群时区分当前是在哪个上下文,即哪个集群里操作的)
–cluster:集群名称,要和上边第一步的名称完全一致
–user:用户名称,要和第二歩的客户端名称完全一致
设置当前默认上下文
使用kubeconfig中的一个环境项作为当前配置
kubectl config use-context default --kubeconfig=${KUBE_CONFIG}
等集群拉起后,可以通过这个命令查看当前所在的是哪个集群的上下文
kubectl config current-context
创建controller-manager配置文件
cat > /opt/kubernetes/cfg/kube-controller-manager.conf << EOF
KUBE_CONTROLLER_MANAGER_OPTS="--logtostderr=false \\
--v=2 \\
--log-dir=/opt/kubernetes/logs \\
--leader-elect=true \\
--kubeconfig=/opt/kubernetes/cfg/kube-controller-manager.kubeconfig \\
--bind-address=127.0.0.1 \\
--allocate-node-cidrs=true \\
--cluster-cidr=10.244.0.0/16 \\
--service-cluster-ip-range=10.0.0.0/24 \\
--cluster-signing-cert-file=/opt/kubernetes/ssl/ca.pem \\
--cluster-signing-key-file=/opt/kubernetes/ssl/ca-key.pem \\
--root-ca-file=/opt/kubernetes/ssl/ca.pem \\
--service-account-private-key-file=/opt/kubernetes/ssl/ca-key.pem \\
--cluster-signing-duration=87600h0m0s"
EOF
配置介绍:
–kubeconfig:连接apiserver配置文件。
–leader-elect:当该组件启动多个时,自动选举(HA)
–cluster-signing-cert-file:自动为kubelet颁发证书的CA
–cluster-signing-key-file:自动为kubelet颁发证书的CA
创建controller-manager启动文件
配置systemd管理
cat > /usr/lib/systemd/system/kube-controller-manager.service << EOF
[Unit]
Description=Kubernetes Controller Manager
Documentation=https://github.com/kubernetes/kubernetes
[Service]
EnvironmentFile=/opt/kubernetes/cfg/kube-controller-manager.conf
ExecStart=/opt/kubernetes/bin/kube-controller-manager \$KUBE_CONTROLLER_MANAGER_OPTS
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
启动服务
systemctl daemon-reload
systemctl start kube-controller-manager
systemctl enable kube-controller-manager
systemctl status kube-controller-manager
部署kube-scheduler
创建证书请求文件
cat > kube-scheduler-csr.json << EOF
{
"CN": "system:kube-scheduler",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "ShangHai",
"ST": "ShangHai",
"O": "system:masters",
"OU": "System"
}
]
}
EOF
生成证书
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-scheduler-csr.json | cfssljson -bare kube-scheduler
生成kubeconfig文件
KUBE_CONFIG="/opt/kubernetes/cfg/kube-scheduler.kubeconfig"
KUBE_APISERVER="https://192.168.190.200:6443"
kubectl config set-cluster kubernetes \
--certificate-authority=/opt/kubernetes/ssl/ca.pem \
--embed-certs=true \
--server={KUBE_CONFIG}
kubectl config set-credentials kube-scheduler \
--client-certificate=./kube-scheduler.pem \
--client-key=./kube-scheduler-key.pem \
--embed-certs=true \
--kubeconfig=${KUBE_CONFIG}
kubectl config set-context default \
--cluster=kubernetes \
--user=kube-scheduler \
--kubeconfig=${KUBE_CONFIG}
kubectl config use-context default --kubeconfig=${KUBE_CONFIG}
创建scheduler.conf文件
cat > /opt/kubernetes/cfg/kube-scheduler.conf << EOF
KUBE_SCHEDULER_OPTS="--logtostderr=false \\
--v=2 \\
--log-dir=/opt/kubernetes/logs \\
--leader-elect \\
--kubeconfig=/opt/kubernetes/cfg/kube-scheduler.kubeconfig \\
--bind-address=127.0.0.1"
EOF
创建scheduler.conf启动文件
cat > /usr/lib/systemd/system/kube-scheduler.service << EOF
[Unit]
Description=Kubernetes Scheduler
Documentation=https://github.com/kubernetes/kubernetes
[Service]
EnvironmentFile=/opt/kubernetes/cfg/kube-scheduler.conf
ExecStart=/opt/kubernetes/bin/kube-scheduler \$KUBE_SCHEDULER_OPTS
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
启动
systemctl daemon-reload
systemctl start kube-scheduler
systemctl enable kube-scheduler
systemctl status kube-scheduler
3.4、配置kubectl管理集群
3.4.1、配置kubectl证书
cat > admin-csr.json <<EOF
{
"CN": "admin",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "ShangHai",
"ST": "ShangHai",
"O": "system:masters",
"OU": "System"
}
]
}
EOF
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes admin-csr.json | cfssljson -bare admin
3.4.2、配置kubectl使用的kubeconfig
mkdir /root/.kube
KUBE_CONFIG="/root/.kube/config"
KUBE_APISERVER="https://192.168.190.200:6443"
kubectl config set-cluster kubernetes \
--certificate-authority=/opt/kubernetes/ssl/ca.pem \
--embed-certs=true \
--server={KUBE_CONFIG}
kubectl config set-credentials cluster-admin \
--client-certificate=./admin.pem \
--client-key=./admin-key.pem \
--embed-certs=true \
--kubeconfig=${KUBE_CONFIG}
kubectl config set-context default \
--cluster=kubernetes \
--user=cluster-admin \
--kubeconfig=${KUBE_CONFIG}
kubectl config use-context default --kubeconfig=${KUBE_CONFIG}
3.4.3、验证
各组件状态正常即可
[root@k8s-master1 k8s]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
scheduler Healthy ok
controller-manager Healthy ok
etcd-2 Healthy {"health":"true","reason":""}
etcd-0 Healthy {"health":"true","reason":""}
etcd-1 Healthy {"health":"true","reason":""}
3.5、部署kubelet
3.5.1、kubelet.yml
cat > /opt/kubernetes/cfg/kubelet-config.yml << EOF
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
address: 0.0.0.0
port: 10250
readOnlyPort: 10255
cgroupDriver: cgroupfs
clusterDNS:
- 10.0.0.240
clusterDomain: cluster.local
failSwapOn: false
authentication:
anonymous:
enabled: false
webhook:
cacheTTL: 2m0s
enabled: true
x509:
clientCAFile: /opt/kubernetes/ssl/ca.pem
authorization:
mode: Webhook
webhook:
cacheAuthorizedTTL: 5m0s
cacheUnauthorizedTTL: 30s
evictionHard:
imagefs.available: 15%
memory.available: 100Mi
nodefs.available: 10%
nodefs.inodesFree: 5%
maxOpenFiles: 1000000
maxPods: 110
EOF
配置说明:
evictionHard:驱逐资源硬限制(当达到下面配置项的阈值后会触发驱逐)
imagefs.available:容器运行时镜像存储空间剩余量
memory.available:宿主机可用内存
nodefs.available:宿主机可用磁盘空间(一般是指根目录)
nodefs.inodesFree:宿主机可用inode(df -i可查看总量)
创建配置kubelet.conf
cat > /opt/kubernetes/cfg/kubelet.conf << EOF
KUBELET_OPTS="--logtostderr=false \\
--v=2 \\
--log-dir=/opt/kubernetes/logs \\
--hostname-override=k8s-master1 \\
--network-plugin=cni \\
--kubeconfig=/opt/kubernetes/cfg/kubelet.kubeconfig \\
--bootstrap-kubeconfig=/opt/kubernetes/cfg/bootstrap.kubeconfig \\
--config=/opt/kubernetes/cfg/kubelet-config.yml \\
--cert-dir=/opt/kubernetes/ssl \\
--pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google-containers/pause-amd64:3.0"
EOF
3.5.3、生成bootstrap.kubeconfig文件
临时变量
KUBE_CONFIG="/opt/kubernetes/cfg/bootstrap.kubeconfig"
KUBE_APISERVER="https://192.168.190.200:6443"
TOKEN="11886906d4a739a3103c418c95eff93e" # !!与/opt/kubernetes/cfg/token.csv文件中数据保持一致
生成配置
kubectl config set-cluster kubernetes \
--certificate-authority=/opt/kubernetes/ssl/ca.pem \
--embed-certs=true \
--server={KUBE_CONFIG}
kubectl config set-credentials "kubelet-bootstrap" \
--token={KUBE_CONFIG}
kubectl config set-context default \
--cluster=kubernetes \
--user="kubelet-bootstrap" \
--kubeconfig=${KUBE_CONFIG}
kubectl config use-context default --kubeconfig=${KUBE_CONFIG}
3.5.4、授权kubelet-bootstrap用户允许请求证书
在启动kubelet后,kubelet会自动用上一步的kubeconfig配置去向apiserver申请证书,而配置里的client用户是kubelet-bootstrap,所以要先给该用户一个权限才可以
kubectl create clusterrolebinding kubelet-bootstrap \
--clusterrole=system:node-bootstrapper \
--user=kubelet-bootstrap
3.5.5、配置kubelet启动服务
cat > /usr/lib/systemd/system/kubelet.service << EOF
[Unit]
Description=Kubernetes Kubelet
After=docker.service
[Service]
EnvironmentFile=/opt/kubernetes/cfg/kubelet.conf
ExecStart=/opt/kubernetes/bin/kubelet \$KUBELET_OPTS
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
启动
systemctl daemon-reload
systemctl start kubelet
systemctl status kubelet
3.4.13、批准kubelet的证书申请
查看证书申请csr(certificatesigningrequest),状态为pending等待中
[root@localhost k8s]# kubectl get csr
NAME AGE SIGNERNAME REQUESTOR CONDITION
node-csr-Llfysfg-RRWFEE7aMoyEy6VFAV755dK2ekY0gYC8fMI 40s kubernetes.io/kube-apiserver-client-kubelet kubelet-bootstrap Pending
批准kubelet证书申请
[root@localhost k8s]# kubectl certificate approve node-csr-Llfysfg-RRWFEE7aMoyEy6VFAV755dK2ekY0gYC8fMI
certificatesigningrequest.certificates.k8s.io/node-csr-Llfysfg-RRWFEE7aMoyEy6VFAV755dK2ekY0gYC8fMI approved
查看csr状态,状态为Approved,Issued(已批准)
[root@localhost k8s]# kubectl get csr
NAME AGE SIGNERNAME REQUESTOR CONDITION
node-csr-Llfysfg-RRWFEE7aMoyEy6VFAV755dK2ekY0gYC8fMI 4m26s kubernetes.io/kube-apiserver-client-kubelet kubelet-bootstrap Approved,Issued
3.6、部署kube-proxy
切换到工作目录
cd ~/TLS/k8s
创建证书请求文件
cat > kube-proxy-csr.json << EOF
{
"CN": "system:kube-proxy",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "ShangHai",
"ST": "ShangHai",
"O": "k8s",
"OU": "System"
}
]
}
EOF
生成证书
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-proxy-csr.json | cfssljson -bare kube-proxy
3.6.2、生成proxy.config配置文件
KUBE_CONFIG="/opt/kubernetes/cfg/kube-proxy.kubeconfig"
KUBE_APISERVER="https://192.168.190.200:6443"
kubectl config set-cluster kubernetes \
--certificate-authority=/opt/kubernetes/ssl/ca.pem \
--embed-certs=true \
--server={KUBE_CONFIG}
kubectl config set-credentials kube-proxy \
--client-certificate=./kube-proxy.pem \
--client-key=./kube-proxy-key.pem \
--embed-certs=true \
--kubeconfig=${KUBE_CONFIG}
kubectl config set-context default \
--cluster=kubernetes \
--user=kube-proxy \
--kubeconfig=${KUBE_CONFIG}
kubectl config use-context default --kubeconfig=${KUBE_CONFIG}
3.6.3、定义配置参数,指定proxy.yml
cat > /opt/kubernetes/cfg/kube-proxy-config.yml << EOF
kind: KubeProxyConfiguration
apiVersion: kubeproxy.config.k8s.io/v1alpha1
bindAddress: 0.0.0.0
metricsBindAddress: 0.0.0.0:10249
clientConnection:
kubeconfig: /opt/kubernetes/cfg/kube-proxy.kubeconfig
hostnameOverride: k8s-master1
#mode: "ipvs"
clusterCIDR: 10.244.0.0/16
EOF
单独介绍下mode参数
这个是配置kube-proxy的工作模式,目前用的基本就是这两种,都是基于内核的netfilter实现的:
iptables: 默认使用的模式,通过创建一条条iptables规则链来访问集群内service。这种模式pod内ping不通service的IP
ipvs: 专门用来做负载均衡的技术,lvs就用的这个。pod可以ping通service的IP
这里就先不展开详细说了,要单独开单章说明。
这里部署就先用默认的iptables模式就可以,在服务量级不大的时候,iptables和ipvs性能差不多
3.6.4、创建proxy.conf
cat > /opt/kubernetes/cfg/kube-proxy.conf << EOF
KUBE_PROXY_OPTS="--logtostderr=false \\
--v=2 \\
--log-dir=/opt/kubernetes/logs \\
--config=/opt/kubernetes/cfg/kube-proxy-config.yml"
EOF
3.6.5、配置systemd管理、启动服务
cat > /usr/lib/systemd/system/kube-proxy.service << EOF
[Unit]
Description=Kubernetes Proxy
After=network.target
[Service]
EnvironmentFile=/opt/kubernetes/cfg/kube-proxy.conf
ExecStart=/opt/kubernetes/bin/kube-proxy \$KUBE_PROXY_OPTS
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
启动服务
systemctl daemon-reload
systemctl start kube-proxy
systemctl enable kube-proxy
systemctl status kube-proxy
3.7、安装cni网络插件这里选择用calico来作为网络插件使用
calico官网:https://www.tigera.io/project-calico/
查看版本对应关系
从官网文档说明里得知,calico-v3.23版本支持k8s的v1.23版本,所以这里就选择安装v3.23版本好了
前面记得关闭NetworkManager
mkdir /opt/kubernetes/calico && cd /opt/kubernetes/calico
下载官方yaml文件 官网 https://docs.tigera.io/archive
Kubernetes requirements
Supported versions
We test Calico v3.21 against the following Kubernetes versions.
v1.20
v1.21
v1.22
kubectl create -f https://docs.projectcalico.org/archive/v3.21/manifests/tigera-operator.yaml
curl https://projectcalico.docs.tigera.io/archive/v3.21/manifests/calico.yaml -O
3.7.2、根据环境修改文件配置项
修改calico.yaml
改CALICO_IPV4POOL_CIDR项为我们上边定义的clusterCIDR(指定pod的IP池)
- name: CALICO_IPV4POOL_CIDR
value: "10.244.0.0/16"
3.7.3、启动服务
拉起
kubectl apply -f calico.yaml
查看服务
[root@localhost calico]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-5bb48c55fd-8p2jx 0/1 Pending 0 78s
calico-node-k6w89 0/1 Init:0/3 0 79s
等calico的pod都Running后,查看node状态也变为ready
[root@localhost calico]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-master1 Ready <none> 68m v1.20.13
如果有启动失败,可以describe查看event或者docker logs查看容器日志排错
3.8、配置apiserver访问kubelet权限
允许使用kubectl来查看pod日志
不然会有如下报错
Error from server (Forbidden): Forbidden (user=kubernetes, verb=get, resource=nodes, subresource=proxy) ( pods/log calico-node-8kmr5)
mkdir /opt/kubernetes/yaml && cd /opt/kubernetes/yaml
配置
cat > apiserver-to-kubelet-rbac.yaml << EOF
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
labels:
kubernetes.io/bootstrapping: rbac-defaults
name: system:kube-apiserver-to-kubelet
rules:
- apiGroups:
- ""
resources: - nodes/proxy
- nodes/stats
- nodes/log
- nodes/spec
- nodes/metrics
- pods/log
verbs: - "*"
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system:kube-apiserver
namespace: ""
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:kube-apiserver-to-kubelet
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: User
name: kubernetes
EOF
kubectl apply -f apiserver-to-kubelet-rbac.yaml
master1节点完成
4、node节点
mkdir -p /opt/kubernetes/{cfg,bin,log,ssl}
4.2、把master上的配置信息拷贝到node节点中(master节点操作)
注:这里注意,要把cfg/kubelet.kubeconfig这个文件删除,因为是apiserver那边颁发证书后自动生成的,每个节点不一样
scp /opt/kubernetes/cfg/{kubelet*,kube-proxy*,bootstrap*} 192.168.190.202:/opt/kubernetes/cfg/
scp /opt/kubernetes/bin/{kubelet*,kube-proxy*} 192.168.190.202:/opt/kubernetes/bin/
scp /opt/kubernetes/ssl/ca.pem 192.168.190.202:/opt/kubernetes/ssl/
scp /usr/lib/systemd/system/{kubelet,kube-proxy}.service 192.168.190.202:/usr/lib/systemd/system/
4.3、修改配置(node节点操作)
a. 修改kubelet.conf文件中hostname-override值为所在node节点主机名
cd /opt/kubernetes/cfg/
vim kubelet.conf
.....
--hostname-override=k8s-node1 \
.....
b.修改kube-proxy-config.yml文件中hostnameOverride值为所在node节点主机名
vim kube-proxy-config.yml
.....
hostnameOverride: k8s-node1
......
c.删除kubelet.kubconfig
rm /opt/kubernetes/cfg/kubelet.kubeconfig
4.4、启动服务(node节点操作)
systemctl daemon-reload
systemctl start kubelet kube-proxy
systemctl enable kubelet kube-proxy
4.5、master中查看证书申请并同意
[root@localhost cfg]# kubectl get csr
NAME AGE SIGNERNAME REQUESTOR CONDITION
node-csr-m8BE6FA5zXXLreTePYmT1lC3nyaazJwrq88Cpiamj1U 29s kubernetes.io/kube-apiserver-client-kubelet kubelet-bootstrap Pending
[root@localhost cfg]# kubectl certificate approve node-csr-m8BE6FA5zXXLreTePYmT1lC3nyaazJwrq88Cpiamj1U
certificatesigningrequest.certificates.k8s.io/node-csr-m8BE6FA5zXXLreTePYmT1lC3nyaazJwrq88Cpiamj1U approved
4.6、查看集群node状态
会在新加节点上启动一些初始服务,如calico-node,所以需要稍等一会状态就可变为ready
[root@localhost cfg]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-master1 Ready <none> 125m v1.20.13
k8s-node1 NotReady <none> 33s v1.20.13
5、部署Coredns
一般情况下,pod之间通信都是用service的clusterIP,但是ip有难以记忆等问题,所以需要加一个DNS来解析,可以使用service_name来进行服务之间相互调用。大概是从k8s的1.11版本以来,k8s就直接从kube-dns转为coredns了,所以本次DNS选择coredns
5.1、拉取配置
这里就用容器形式部署DNS了,方便快捷
mkdir /opt/kubernetes/coredns && cd /opt/kubernetes/coredns
查看
https://github.com/coredns/deployment/blob/master/kubernetes/CoreDNS-k8s_version.md
拷贝一下,coredns.yaml.sed deploy.sh 文档###提示拷贝到vim里面总是乱序,我是拷贝到txt再传上去的
cp coredns.yaml.sed coredns.yaml
5.2、修改配置
vim coredns.yaml
匹配10.0.0.2 改成10.0.0.240
5.3、运行
./deploy.sh | kubectl apply -f coredns.yaml
[root@k8s-master1 coredns]# kubectl get pod,svc -n kube-system
NAME READY STATUS RESTARTS AGE
pod/calico-kube-controllers-5bb48c55fd-8p2jx 1/1 Running 1 2d16h
pod/calico-node-k6w89 1/1 Running 1 2d16h
pod/calico-node-r86wf 1/1 Running 1 2d15h
pod/coredns-79495b5589-zk72g 1/1 Running 0 3m49s
强制删除一直处于Terminating的pod
[root@k8s-master1 ~]# kubectl get po -n kube-system -l k8s-app=kube-dns
NAME READY STATUS RESTARTS AGE
coredns-fb4874468-fgs2h 1/1 Terminating 0 6d20h
[root@k8s-master1 ~]# kubectl delete pods coredns-fb4874468-fgs2h --grace-period=0 --force -n kube-system
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "coredns-fb4874468-fgs2h" force deleted
根据创建删除
kubectl delete -f coredns.yaml
5.4、测试
查看当前的svc
[root@k8s-master1 coredns]# kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes ClusterIP 10.0.0.1 <none> 443/TCP 6h59m
启动一个临时pod(busybox)测试解析
[root@k8s-master1 coredns]# kubectl run -ti --rm busybox-test --image=busybox:1.35 sh
/ # nslookup kubernetes
Server: 10.0.0.240
Address: 10.0.0.240:53
** server can't find kubernetes.cluster.local: NXDOMAIN
Name: kubernetes.default.svc.cluster.local
Address: 10.0.0.1
测试端口
/ # nc -vz kubernetes 443
kubernetes (10.0.0.1:443) open
/ #
/ # nc -vz 10.0.0.1 443
10.0.0.1 (10.0.0.1:443) open
6、部署一个官方的dashboard
mkdir /opt/kubernetes/dashboard && cd /opt/kubernetes/dashboard
地址:https://github.com/kubernetes/dashboard/releases?page=1
版本选择kubernetesui/dashboard:v2.4.0 kubernetesui/metrics-scraper:v1.0.7
wget https://raw.githubusercontent.com/kubernetes/dashboard/v2.4.0/aio/deploy/recommended.yaml
6.2、修改配置
集群角色简介:
ClusterRole:是集群的权限
ServiceAccount:是集群的用户
ClusterRoleBinding:起到把权限和用户绑在一起的作用
官方的配置里创建的serviceaccount用户(kubernetes-dashboard)没有权限打开面板页面
所以我们把默认用户绑到集群原有的cluster-admin规则上即可,修改配置
修改service
默认是ClusterIP类型,要改为NodePort方便访问(加一行即可)
kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
ports:
type: NodePort #####添加
- port: 443
targetPort: 8443
nodePort: 30000 #####添加
selector:
k8s-app: kubernetes-dashboard
6.3、拉起服务
[root@k8s-master1 dashboard]# kubectl apply -f dashboard.yaml
[root@k8s-master1 dashboard]# kubectl get pod,svc -n kubernetes-dashboard
NAME READY STATUS RESTARTS AGE
pod/dashboard-metrics-scraper-6f669b9c9b-6hkkf 1/1 Running 0 56m
pod/kubernetes-dashboard-758765f476-nh988 1/1 Running 0 56m
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/dashboard-metrics-scraper ClusterIP 10.0.203.46 <none> 8000/TCP 56m
service/kubernetes-dashboard NodePort 10.0.51.31 <none> 443:30143/TCP 56m
创建用户
cat >dashboard-adminuser.yaml<<-EOF
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin-user
namespace: kubernetes-dashboard
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: admin-user
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: admin-user
namespace: kubernetes-dashboard
EOF
拉起文件
kubectl apply -f dashboard-adminuser.yaml
[root@k8s-master1 dashboard]# kubectl get serviceaccounts -n kubernetes-dashboard
NAME SECRETS AGE
admin-user 1 20s
default 1 96m
kubernetes-dashboard 1 96m
查看token
kubectl -n kubernetes-dashboard describe secret $(kubectl -n kubernetes-dashboard get secret | grep admin-user | awk '{print $1}')
eyJhbGciOiJSUzI1NiIsImtpZCI6InlEVDZRamdPMU4yTWw4R0pMQ3V4QVJFV2Q3Q0t1Umw2ODZEWkRLV3dWNGcifQ.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJrdWJlcm5ldGVzLWRhc2hib2FyZCIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VjcmV0Lm5hbWUiOiJhZG1pbi11c2VyLXRva2VuLTlscHgyIiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9zZXJ2aWNlLWFjY291bnQubmFtZSI6ImFkbWluLXVzZXIiLCJrdWJlcm5ldGVzLmlvL3NlcnZpY2VhY2NvdW50L3NlcnZpY2UtYWNjb3VudC51aWQiOiJkNDMwMTY5Ni1kZTIzLTQ4YjEtYjA5Ni04ZGViY2U1ZDE3NDIiLCJzdWIiOiJzeXN0ZW06c2VydmljZWFjY291bnQ6a3ViZXJuZXRlcy1kYXNoYm9hcmQ6YWRtaW4tdXNlciJ9.KsX6d84xlN-48wieZGKvQH6tzcYimjXBOL7GlPgTHxCE8buKit1KJtXSy9vc033gfMxiACrZHos7xS0s5JGBQ3__OGSnyAdGI3lw0Gev-pmUx8L978T1uV8VbW9iyW0DlsdYvWms4DDZCBPoGh1FkXrQy1YEusYfbkTRIdpiR19R_s7e986niThXNkfqavp7OX3uGRa-qUmGbNXm8viR--V0avQGQCeDP8LOSd_m7HeyK5Vtg08KOzlO99qZhRxj3rZHoMV12UUGw5OWJhuuC5msdJYDcPH6lzrnja0A5rqHGLapNayhzSP2qPl07m4Dp32RIxTygAX2RuTp2Eq8Rw
7、再装一个metrics
实现目的:可以通过kubectl top xxx看状态等
mkdir /opt/kubernetes/metrics && cd /opt/kubernetes/metrics
wget https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
7.2、修改配置
因为yaml里用的镜像是国外的,所以要改一下
[root@k8s-master1 metrics]# docker search metrics-server
NAME DESCRIPTION STARS OFFICIAL AUTOMATED
mirrorgooglecontainers/metrics-server-amd64 17
bitnami/metrics-server Bitnami Docker Image for Metrics Server 13 [OK]
rancher/metrics-server 5
rancher/metrics-server-amd64
修改yaml中镜像
原内容:
image: k8s.gcr.io/metrics-server/metrics-server:v0.6.2
imagePullPolicy: IfNotPresent
改为:
image: bitnami/metrics-server:0.6.2
imagePullPolicy: IfNotPresent
添加不验证证书配置
不然启动后describe时events里会报Readiness probe failed: HTTP probe failed with statuscode: 500
新增:
containers:
- args:
- --cert-dir=/tmp
- --secure-port=4443
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
- --metric-resolution=15s
- --kubelet-insecure-tls # 新增的这个,不验证证书
image: bitnami/metrics-server:0.6.2
7.3、拉起服务及验证
kubectl apply -f components.yaml
[root@k8s-master1 metrics]# kubectl get pod -n kube-system |grep metr
metrics-server-7c65894ccb-8dxnr 1/1 Running 0 5m32s
验证
[root@k8s-master1 metrics]# kubectl top nodes
NAME CPU(cores) CPU% MEMORY(bytes) MEMORY%
k8s-master1 176m 8% 1329Mi 70%
k8s-node1 76m 3% 985Mi 52%
k8s-node2 83m 4% 1065Mi 56%
1、部署新增master2节点
按照规划,这里要新增一台192.168.190.201机器,划为master2
1.1、系统初始化+安装docker
这个前边步骤里都有,就不赘述了,按照前边的步骤
【二.1、系统初始化】和【三.2、安装docker】操作即可
1.2、开始部署
因master2的部署操作和master1基本一致,所以就把配置文件拷贝过来,修改下启动服务即可
创建etcd的ssl目录(master2中操作)
mkdir /opt/etcd
拷贝master1文件(master1中操作)
scp -r /opt/kubernetes/ 192.168.190.201:/opt/
scp -r /opt/etcd/ssl/ 192.168.190.201:/opt/etcd/
scp /usr/lib/systemd/system/kube* 192.168.190.201:/usr/lib/systemd/system/
scp /usr/bin/kubectl 192.168.190.201:/usr/bin/
scp /root/.kube 192.168.190.201:/root/
删除kubelet自动生成的配置(master2中操作)
rm -f /opt/kubernetes/cfg/kubelet.kubeconfig
rm -f /opt/kubernetes/ssl/kubelet*
1.3、修改配置(master2中操作)
vim /opt/kubernetes/cfg/kube-apiserver.conf
--bind-address=192.168.190.201 \
--advertise-address=192.168.190.201 \
vim /opt/kubernetes/cfg/kube-controller-manager.kubeconfig
server: https://192.168.190.201:6443
vim /opt/kubernetes/cfg/kube-scheduler.kubeconfig
server: https://192.168.190.201:6443
vim /opt/kubernetes/cfg/kubelet.conf
--hostname-override=k8s-master2
vim /opt/kubernetes/cfg/kube-proxy-config.yml
hostnameOverride: k8s-master2
vim ~/.kube/config
...
server: https://192.168.190.201:6443
1.4、启动服务(master2中操作)
systemctl daemon-reload
systemctl start kube-apiserver kube-controller-manager kube-scheduler kubelet kube-proxy
systemctl enable kube-apiserver kube-controller-manager kube-scheduler kubelet kube-proxy
systemctl status kube-apiserver kube-controller-manager kube-scheduler kubelet kube-proxy
1.5、审批kubelet的申请(master1中操作)
[root@k8s-master1 .kube]# kubectl get csr
NAME AGE SIGNERNAME REQUESTOR CONDITION
node-csr--ygjamp1lr6HiMH6q8Z6CGaI22XG0QSMSebxy5YbLi8 10m kubernetes.io/kube-apiserver-client-kubelet kubelet-bootstrap Pending
node-csr-GjW73Eu62MCPq7ZVFR-ps71IUHADifExcdFVkeWNwEY 34s kubernetes.io/kube-apiserver-client-kubelet kubelet-bootstrap Pending
node-csr-Llfysfg-RRWFEE7aMoyEy6VFAV755dK2ekY0gYC8fMI 7h16m kubernetes.io/kube-apiserver-client-kubelet kubelet-bootstrap Pending
[root@k8s-master1 .kube]# kubectl certificate approve node-csr-GjW73Eu62MCPq7ZVFR-ps71IUHADifExcdFVkeWNwEY
certificatesigningrequest.certificates.k8s.io/node-csr-GjW73Eu62MCPq7ZVFR-ps71IUHADifExcdFVkeWNwEY approved
certificatesigningrequest.certificates.k8s.io/node-csr-iMojU9INDQmkgNOCvh8IbW33qj8CQ4sj2Tsizet-mKQ approved
1.6、验证
master1中操作
[root@k8s-master1 opt]# kubectl get nodes -owide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
k8s-master1 Ready <none> 8d v1.23.13 192.168.190.200 <none> CentOS Linux 7 (Core) 3.10.0-1127.el7.x86_64 docker://19.03.15
k8s-master2 NotReady <none> 38s v1.23.13 192.168.190.201 <none> CentOS Linux 7 (Core) 3.10.0-1127.el7.x86_64 docker://19.03.15
k8s-node1 Ready <none> 8d v1.23.13 192.168.190.202 <none> CentOS Linux 7 (Core) 3.10.0-1127.el7.x86_64 docker://19.03.15
k8s-node2 Ready <none> 8d v1.23.13 192.168.190.203 <none> CentOS Linux 7 (Core) 3.10.0-1127.el7.x86_64 docker://19.03.15
需要等待片刻,等calico在master2节点上拉起后,节点才会变为ready
master2中操作
[root@k8s-master2 opt]# kubectl get nodes -owide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
k8s-master1 Ready <none> 8d v1.23.13 192.168.190.200 <none> CentOS Linux 7 (Core) 3.10.0-1127.el7.x86_64 docker://19.03.15
k8s-master2 Ready <none> 11m v1.23.13 192.168.190.201 <none> CentOS Linux 7 (Core) 3.10.0-1127.el7.x86_64 docker://19.03.15
k8s-node1 Ready <none> 8d v1.23.13 192.168.190.202 <none> CentOS Linux 7 (Core) 3.10.0-1127.el7.x86_64 docker://19.03.15
k8s-node2 Ready <none> 8d v1.23.13 192.168.190.203 <none> CentOS Linux 7 (Core) 3.10.0-1127.el7.x86_64 docker://19.03.15
2、部署nginx+keepalived高可用架构
按照上图的架构,在集群中需要增加
一个nginx,实现请求负载均衡apiserver
一个keepalived,实现用VIP访问nginx,故障时VIP转移,保证nginx始终可被访问
如果是公有云的服务,如腾讯云、阿里云之类的,可直接用他们的CLB、SLB什么的,效果一样
2.1、安装nginx+keepalived(master1/2都操作)
yum install epel-release -y
要安装stream模块
yum install nginx nginx-mod-stream keepalived -y
2.2、添加stream配置(master1/2都操作)
cat >> /etc/nginx/nginx.conf << "EOF"
stream {
log_format main '$remote_addr $upstream_addr - [$time_local] $status $upstream_bytes_sent';
access_log /var/log/nginx/k8s-access.log main;
upstream k8s-apiserver {
server 192.168.190.200:6443; # Master1 APISERVER IP:PORT
server 192.168.190.201:6443; # Master2 APISERVER IP:PORT
}
server {
listen 16443; # 由于nginx与master节点复用,这个监听端口不能是6443,否则会冲突
proxy_pass k8s-apiserver;
}
}
EOF
2.3、检测并启动nginx(master1/2都操作)
[root@k8s-master1 nginx]# nginx -t
nginx: the configuration file /etc/nginx/nginx.conf syntax is ok
nginx: configuration file /etc/nginx/nginx.conf test is successful
[root@k8s-master1 nginx]# systemctl start nginx
[root@k8s-master1 nginx]# systemctl enable nginx
Created symlink from /etc/systemd/system/multi-user.target.wants/nginx.service to /usr/lib/systemd/system/nginx.service.
2.4、配置keepalived(master1/2都操作)
这里要注意修改配置
router_id:master1节点中值为nginx_master,master2节点中值为nginx_backup
state:master1节点中值为MASTER,master2节点中值为BACKUP
priority:master1节点中值为100,master2节点值修改为90
cd /etc/keepalived/
mv keepalived.conf keepalived.conf_bak
cat > keepalived.conf << EOF
global_defs {
notification_email {
[email protected]
[email protected]
[email protected]
}
notification_email_from [email protected]
smtp_server 127.0.0.1
smtp_connect_timeout 30
router_id nginx_master #每个keepalived节点的唯一标识
}
vrrp_script check_nginx { #监测nginx的状态
script "/etc/keepalived/check_nginx.sh" #监控脚本
interval 3 #检测间隔时间,即两秒检测一次
fall 2 #检测失败的最大次数,超过两次认为节点资源发生故障
weight -20 #自动调整优先级的参数,检测成功优先级不变,失败则优先级-20,就会发生切换
}
vrrp_instance VI_1 {
state MASTER #虚拟路由器的初始状态,可选择MASTER或者BACKUP
interface ens33 #要修改为实际网卡名
virtual_router_id 51 #每个虚拟路由的唯一标识ID,本次master和backup同属一个路由,所以值要保持一致
priority 100 #当前节点的优先级,值越大越优先,主节点比备节点大即可
advert_int 1 #VRRP通告的时间间隔,默认为1秒
authentication { #设置同一虚拟路由之间的认证机制
auth_type PASS #认证类型,这里用密码
auth_pass 1111 #预共享密钥,仅前8位有效(就是配置的密码,可以配置为随机数,但是master和backup要一致)
}
virtual_ipaddress { #配置VIP
192.168.190.110/24 #要保证这个IP没有被占用
}
track_script { #定义执行的跟踪脚本
check_nginx
}
}
EOF
2.5、配置检测nginx状态脚本
实现效果:
使用ss命令去检测nginx的16443端口是否存活
如果端口存在,则返回状态0,keepalived不做任何处理
如果端口不存在,则尝试重启nginx,重新判断端口是否存活
如果端口不存在,则返回状态为1,keepalived会做master降级,VIP漂移操作
如果端口存在,则返回状态为0,keepalived不做任何处理
cat > check_nginx.sh << "EOF"
#!/bin/bash
count=$(ss -antp |grep 16443 |wc -l)
if [ "(ss -antp |grep 16443 |wc -l) # 这里要重新赋值才行
if [ count"
exit 1
else
exit 0
fi
else
exit 0
fi
EOF
chmod +x check_nginx.sh
2.6、启动keepalived
systemctl start keepalived
systemctl enable keepalived
验证,刚开始启动,因为master1优先级高,所以VIP是在master1上
[root@k8s-master1 keepalived]# ip a |grep ens33 -A 3
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:0c:29:3a:0d:16 brd ff:ff:ff:ff:ff:ff
inet 192.168.190.200/24 brd 192.168.100.255 scope global noprefixroute ens33
valid_lft forever preferred_lft forever
inet 192.168.190.110/24 scope global secondary ens33
valid_lft forever preferred_lft forever
inet6 fe80::30c0:4897:a86f:f217/64 scope link noprefixroute
valid_lft forever preferred_lft forever
2.7、测试VIP漂移效果
master1中手动停止nginx
[root@k8s-master1 keepalived]# systemctl stop nginx
[root@k8s-master1 keepalived]# ss -antp |grep 16443
master1中查看vip是否还在
[root@k8s-master1 keepalived]# ip a |grep ens33 -A 3
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:0c:29:3a:0d:16 brd ff:ff:ff:ff:ff:ff
inet 192.168.190.200/24 brd 192.168.100.255 scope global noprefixroute ens33
valid_lft forever preferred_lft forever
inet6 fe80::30c0:4897:a86f:f217/64 scope link noprefixroute
valid_lft forever preferred_lft forever
查看keepalived状态
[root@k8s-master1 keepalived]# systemctl status keepalived
● keepalived.service - LVS and VRRP High Availability Monitor
Loaded: loaded (/usr/lib/systemd/system/keepalived.service; enabled; vendor preset: disabled)
Active: active (running) since Sat 2022-12-31 16:15:33 CST; 11min ago
Main PID: 88195 (keepalived)
CGroup: /system.slice/keepalived.service
├─88195 /usr/sbin/keepalived -D
├─88196 /usr/sbin/keepalived -D
├─88197 /usr/sbin/keepalived -D
├─99640 /usr/sbin/keepalived -D
├─99642 /bin/bash /etc/keepalived/check_nginx.sh
└─99658 sleep 2
Dec 31 16:26:14 k8s-master1 Keepalived_vrrp[88197]: /etc/keepalived/check_nginx.sh exited with status 1
Dec 31 16:26:17 k8s-master1 Keepalived_vrrp[88197]: /etc/keepalived/check_nginx.sh exited with status 1
Dec 31 16:26:20 k8s-master1 Keepalived_vrrp[88197]: /etc/keepalived/check_nginx.sh exited with status 1
......
用dashboard测试一下
https://192.168.190.110:30001/#/login
下面这步可以不用,参考的文档上有所以拷贝下来了
3、调整所有节点上的server配置
之前部署时,所有kube服务里的apiserver配置还都是192.168.190.200:6443,即master1的apiserver地址
所以现在虽然VIP已经生效,但是服务并没有去调用这个地址
因此最后一步就是,修改所有节点上的配置文件(包括master和node),让服务去调用
sed -i "s/192.168.190.200:6443/192.168.190.110:16443/g" /opt/kubernetes/cfg/*
验证访问
[root@k8s-master1 keepalived]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-master1 NotReady <none> 4d1h v1.20.13
k8s-master2 Ready <none> 59m v1.20.13
k8s-node1 Ready <none> 3d23h v1.20.13