7.1 node节点亲和性
node节点亲和性调度:nodeAffinity
[root@k8s01 ~]# kubectl explain pod.spec.affinity
KIND: Pod
VERSION: v1
RESOURCE: affinity <Object>
DESCRIPTION:
If specified, the pod's scheduling constraints
Affinity is a group of affinity scheduling rules.
FIELDS:
nodeAffinity <Object> ##pod和node之前亲和性
Describes node affinity scheduling rules for the pod.
podAffinity <Object> ##pod和pod之间亲和性
Describes pod affinity scheduling rules (e.g. co-locate this pod in the
same node, zone, etc. as some other pod(s)).
podAntiAffinity <Object> ##pod和pod之间反亲和性
Describes pod anti-affinity scheduling rules (e.g. avoid putting this pod
in the same node, zone, etc. as some other pod(s)).
#kubectl explain pod.spec.affinity.nodeAffinity
preferredDuringSchedulingIgnoredDuringExecution
requiredDuringSchedulingIgnoredDuringExecution
#prefered表示有节点尽量满足这个位置定义的亲和性,这不是一个必须的条件,软亲和性
#require表示必须有节点满足这个位置定义的亲和性,这是个硬性条件,硬亲和性
#kubectl explain pod.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms
matchExpressions:匹配表达式的
matchFields: 匹配字段的
#kubectl explain pod.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms.matchExpressions
key:检查label
operator:做等值选则还是不等值选则 In,NotIn, Exists, DoesNotExist. Gt, and Lt
values:给定值
例1:使用requiredDuringSchedulingIgnoredDuringExecution硬亲和性
[root@k8s01 ~]# cat pod-nodeaffinity-demo.yaml
apiVersion: v1
kind: Pod
metadata:
name: pod-node-affinity-demo
namespace: default
labels:
app: myapp
tier: frontend
spec:
containers:
- name: pod-node-affinity-demo
image: ikubernetes/myapp:v1
imagePullPolicy: IfNotPresent
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: zone
operator: In
values:
- foo
- bar
#我们检查当前节点中有任意一个节点拥有zone标签的值是foo或者bar,就可以把pod调度到这个node节点的foo或者bar标签上的节点上
kubectl apply -f pod-nodeaffinity-demo.yaml
[root@k8s01 ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod-node-affinity-demo 0/1 Pending 0 16s <none> <none> <none> <none>
#status的状态是pending,上面说明没有完成调度,因为没有一个拥有zone的标签的值是foo或者bar,而且使用的是硬亲和性,必须满足条件才能完成调度
kubectl label node k8s02 zone=foo
#给这个k8s02节点打上标签zone=foo,在查看
[root@k8s01 ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod-node-affinity-demo 1/1 Running 0 43s 10.244.236.146 k8s02 <none> <none>
例2:使用preferredDuringSchedulingIgnoredDuringExecution软亲和性
[root@k8s01 ~]# cat pod-nodeaffinity-demo-2.yaml
apiVersion: v1
kind: Pod
metadata:
name: pod-node-affinity-demo-2
namespace: default
labels:
app: myapp
tier: frontend
spec:
containers:
- name: pod-node-affinity-demo-2
image: ikubernetes/myapp:v1
imagePullPolicy: IfNotPresent
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: zone1
operator: In
values:
- foo1
- bar1
weight: 60
#kubectl apply -f pod-nodeaffinity-demo-2.yaml
[root@k8s01 ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod-node-affinity-demo 1/1 Running 0 11m 10.244.236.146 k8s02 <none> <none>
pod-node-affinity-demo-2 1/1 Running 0 53s 10.244.236.147 k8s02 <none> <none>
上面说明软亲和性是可以运行这个pod的,尽管没有运行这个pod的节点定义的zone1标签
Node节点亲和性针对的是pod和node的关系,Pod调度到node节点的时候匹配的条件
7.2 Pod节点亲和性
pod自身的亲和性调度有两种表示形式
podaffinity:pod和pod更倾向腻在一起,把相近的pod结合到相近的位置,如同一区域,同一机架,这样的话pod和pod之间更好通信,比方说有两
个机房,这两个机房部署的集群有1000台主机,那么我们希望把nginx和tomcat都部署同一个地方的node节点上,可以提高通信效率;
podunaffinity:pod和pod更倾向不腻在一起,如果部署两套程序,那么这两套程序更倾向于反亲和性,这样相互之间不会有影响。
第一个pod随机选则一个节点,做为评判后续的pod能否到达这个pod所在的节点上的运行方式,这就称为pod亲和性;我们怎么判定哪些节点是相同位
置的,哪些节点是不同位置的;我们在定义pod亲和性时需要有一个前提,哪些pod在同一个位置,哪些pod不在同一个位置,这个位置是怎么定义的,
标准是什么?以节点名称为标准,这个节点名称相同的表示是同一个位置,节点名称不相同的表示不是一个位置。
kubectl explain pods.spec.affinity.podAffinity
#requiredDuringSchedulingIgnoredDuringExecution: 硬亲和性
#preferredDuringSchedulingIgnoredDuringExecution:软亲和性
kubectl explain pods.spec.affinity.podAffinity.requiredDuringSchedulingIgnoredDuringExecution
#topologyKey:
#位置拓扑的键,这个是必须字段
#怎么判断是不是同一个位置:
#rack=rack1
#row=row1
#使用rack的键是同一个位置
#使用row的键是同一个位置
#labelSelector:
#我们要判断pod跟别的pod亲和,跟哪个pod亲和,需要靠labelSelector,通过labelSelector选则一组能作为亲和对象的pod资源
#namespace:
#labelSelector需要选则一组资源,那么这组资源是在哪个名称空间中呢,通过namespace指定,如果不指定namespaces,那么就是当前创建pod的名称空间
kubectl explain pods.spec.affinity.podAffinity.requiredDuringSchedulingIgnoredDuringExecution.labelSelector
#matchExpressions 匹配表达式的
#matchLabels 匹配字段的
例1:pod节点亲和性
#定义两个pod,第一个pod做为基准,第二个pod跟着它走
[root@k8s01 ~]# cat pod-required-affinity-demo.yaml
apiVersion: v1
kind: Pod
metadata:
name: pod-first
namespace: default
labels:
app2: myapp2
tier: frontend
spec:
containers:
- name: myapp
image: ikubernetes/myapp:v1
imagePullPolicy: IfNotPresent
---
apiVersion: v1
kind: Pod
metadata:
name: pod-second
labels:
app: backend
tier: db
spec:
containers:
- name: busybox ##这个-出现在啥时候,containers <[]Object>,container是这种类型的时候
image: busybox:1.28
imagePullPolicy: IfNotPresent
command: ["sh","-c","sleep 3600"]
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- {key: app2,operator: In,values: ["myapp2"]}
topologyKey: kubernetes.io/hostname
#topologyKey这里根据kubectl get nodes --show-labels选择一个key
#上面表示创建的pod必须与拥有app2=myapp2标签的pod在一个节点上
kubectl apply -f pod-required-affinity-demo.yaml
[root@k8s01 ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod-first 1/1 Running 0 10s 10.244.236.150 k8s02 <none> <none>
pod-second 1/1 Running 0 10s 10.244.236.151 k8s02 <none> <none>
#kubectl delete -f pod-required-affinity-demo.yaml干完活清理掉
例2:pod节点反亲和性
#定义两个pod,第一个pod做为基准,第二个pod跟它调度节点相反
[root@k8s01 ~]# cat pod-required-anti-affinity-demo.yaml
apiVersion: v1
kind: Pod
metadata:
name: pod-first
labels:
app1: myapp1
tier: frontend
spec:
containers:
- name: myapp
image: ikubernetes/myapp:v1
imagePullPolicy: IfNotPresent
---
apiVersion: v1
kind: Pod
metadata:
name: pod-second
labels:
app: backend
tier: db
spec:
containers:
- name: busybox
image: busybox:1.28
imagePullPolicy: IfNotPresent
command: ["sh","-c","sleep 3600"]
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- {key: app1,operator: In,values: ["myapp1"]}
topologyKey: kubernetes.io/hostname
kubectl apply -f pod-required-anti-affinity-demo.yaml
[root@k8s01 ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod-first 1/1 Running 0 68s 10.244.236.152 k8s02 <none> <none>
pod-second 1/1 Running 0 19s 10.244.73.69 k8s01 <none> <none>
#显示两个pod不在一个node节点上,这就是pod节点反亲和性
#kubectl delete -f pod-required-anti-affinity-demo.yaml 事后
例3:换一个topologykey
kubectl label nodes k8s02 zone=foo #如果已有zone可以添加--overwrite覆盖
[root@k8s01 test1]# cat pod-first-required-anti-affinity-demo-1.yaml
apiVersion: v1
kind: Pod
metadata:
name: pod-first
labels:
app3: myapp3
tier: frontend
spec:
containers:
- name: pod-first
image: ikubernetes/myapp:v1
imagePullPolicy: IfNotPresent
[root@k8s01 test1]# cat pod-second-required-anti-affinity-demo-1.yaml
apiVersion: v1
kind: Pod
metadata:
name: pod-second
labels:
app: backend
tier: db
spec:
containers:
- name: busybox
image: busybox:1.28
imagePullPolicy: IfNotPresent
command: ["sh","-c","sleep 3600"]
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- {key: app3,operator: In,values: ["myapp3"]}
topologyKey: zone
kubectl apply -f pod-first-required-anti-affinity-demo-1.yaml
kubectl apply -f pod-second-required-anti-affinity-demo-1.yaml
#第二个节点现是pending,因为两个节点是同一个位置,现在没有不是同一个位置的了,而且我们要求反亲和性,所以就会处于pending状态,如果
在反亲和性这个位置把required改成preferred,那么也会运行。
#podaffinity:pod节点亲和性,pod倾向于哪个pod
#nodeaffinity:node节点亲和性,pod倾向于哪个node
7.3 污点、容忍度
给了节点选则的主动权,我们给节点打一个污点,不容忍的pod就运行不上来,污点就是定义在节点上的键值属性数据,可以定决定拒绝那些pod;
taints是键值数据,用在节点上,定义污点;
tolerations是键值数据,用在pod上,定义容忍度,能容忍哪些污点
pod亲和性是pod属性;但是污点是节点的属性,污点定义在nodeSelector上
[root@k8s01 test1]# kubectl describe node k8s01|grep -A 2 Taints
Taints: <none>
Unschedulable: false
kubectl explain node.spec.taints
#effect <string> -required-
#key <string> -required-
#timeAdded <string>
#value <string>
taints的effect用来定义对pod对象的排斥等级(效果):
NoSchedule:
仅影响pod调度过程,当pod能容忍这个节点污点,就可以调度到当前节点,后来这个节点的污点改了,加了一个新的污点,使得之前调度的pod不能容
忍了,那这个pod会怎么处理,对现存的pod对象不产生影响
NoExecute:
既影响调度过程,又影响现存的pod对象,如果现存的pod不能容忍节点后来加的污点,这个pod就会被驱逐
PreferNoSchedule:
最好不,也可以,是NoSchedule的柔性版本
在pod对象定义容忍度的时候支持两种操作:
1.等值密钥:key和value上完全匹配
2.存在性判断:key和effect必须同时匹配,value可以是空
在pod上定义的容忍度可能不止一个,在节点上定义的污点可能多个,需要琢个检查容忍度和污点能否匹配,每一个污点都能被容忍,才能完成调度,
如果不能容忍怎么办,那就需要看pod的容忍度了
master这个节点的污点是Noschedule
[root@k8s01 ~]# kubectl describe pods kube-apiserver-k8s01 -n kube-system|grep -A 2 Tolerations
Tolerations: :NoExecute op=Exists
#可以看到这个pod的容忍度是NoExecute,则可以调度到master上
管理节点污点
kubectl taint –help
例1:把k8s01当成是生产环境专用的,其他node是测试的
kubectl taint nodes k8s01 node-type=production:NoSchedule
kubectl taint nodes k8s02 node-type=production:NoSchedule
[root@k8s01 test1]# kubectl describe nodes k8s01 |grep -A 2 Taints
Taints: node-type=production:NoSchedule
Unschedulable: false
#给k8s01、k8s02打污点,pod如果不能容忍就不会调度过来
[root@k8s01 test1]# cat pod-taint.yaml
apiVersion: v1
kind: Pod
metadata:
name: taint-pod
namespace: default
labels:
tomcat: tomcat-pod
spec:
containers:
- name: taint-pot
ports:
- containerPort: 8080
image: tomcat:8.5-jre8-alpine
imagePullPolicy: IfNotPresent
kubectl apply -f pod-taint.yaml
[root@k8s01 test1]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
taint-pod 0/1 Pending 0 11s <none> <none> <none> <none>
##创建pod的时候没有容忍度,所以pod一直处于Pending
##将k8s02的污点去掉
kubectl taint nodes k8s02 node-type-
kubectl describe nodes k8s02 |grep -A 2 Taints
[root@k8s01 test1]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
taint-pod 1/1 Running 0 79s 10.244.236.154 k8s02 <none> <none>
例2:给k8s01去掉污点,k8s02打NoExecute
kubectl taint nodes k8s01 node-type-
kubectl taint nodes k8s02 node-type=production:NoExecute
[root@k8s01 test1]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
taint-pod 0/1 Terminating 0 6m55s 10.244.236.154 k8s02 <none> <none>
[root@k8s01 test1]# kubectl get pods -o wide
No resources found in default namespace.
##k8s02的服务直接给干死了
[root@k8s01 test1]# kubectl apply -f pod-taint.yaml
pod/taint-pod created
[root@k8s01 test1]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
taint-pod 0/1 ContainerCreating 0 3s <none> k8s01 <none> <none>
##重启跑到k8s01了
[root@k8s01 test1]# cat pod-demo-1.yaml
apiVersion: v1
kind: Pod
metadata:
name: myapp-deploy
namespace: default
labels:
app: myapp
release: canary
spec:
containers:
- name: myapp
image: ikubernetes/myapp:v1
imagePullPolicy: IfNotPresent
ports:
- containerPort: 80
tolerations:
- key: "node-type"
operator: Equal
value: production
effect: NoExecute
tolerationSeconds: 3600
#(which must be of effect NoExecute, otherwise this field is ignored),
#如果不是NoExecute,不忽略还会报错
kubectl apply -f pod-demo-1.yaml
[root@k8s01 test1]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATE
myapp-deploy 1/1 Running 0 11s 10.244.236.155 k8s02 <none>
taint-pod 1/1 Running 0 8m14s 10.244.73.73 k8s01 <none>
#因为我们使用的是equal(等值匹配),所以key和value,effect必须和node节点定义的污点完全匹配才可以
kubectl delete -f pod-demo-1.yaml
kubectl delete -f pod-taint.yaml
例3 再次修改
[root@k8s01 test1]# cat pod-demo-1.yaml标签:node,kubectl,affinity,容忍度,pod,k8s,k8s01,节点 From: https://blog.51cto.com/u_13236892/5787374
apiVersion: v1
kind: Pod
metadata:
name: myapp-deploy
namespace: default
labels:
app: myapp
release: canary
spec:
containers:
- name: myapp
image: ikubernetes/myapp:v1
imagePullPolicy: IfNotPresent
ports:
- containerPort: 80
tolerations:
- key: "node-type"
operator: Exists
value: ""
effect: NoSchedule
kubectl apply -f pod-demo-1.yaml
[root@k8s01 test1]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
myapp-deploy 1/1 Running 0 4s 10.244.73.74 k8s01 <none> <none>
再次修改:
[root@k8s01 test1]# cat pod-demo-1.yaml
apiVersion: v1
kind: Pod
metadata:
name: myapp-deploy
namespace: default
labels:
app: myapp
release: canary
spec:
containers:
- name: myapp
image: ikubernetes/myapp:v1
imagePullPolicy: IfNotPresent
ports:
- containerPort: 80
tolerations:
- key: "node-type"
operator: Exists
value: ""
effect: ""
kubectl delete -f pod-demo-1.yaml
kubectl apply -f pod-demo-1.yaml
[root@k8s01 test1]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
myapp-deploy 1/1 Running 0 40s 10.244.236.157 k8s02 <none> <none>
#有一个node-type的键,不管值是什么,不管是什么效果,都能容忍
清理与还原
kubectl delete -f pod-demo-1.yaml
kubectl taint nodes k8s01 node-type-