安装metrics-server后显示Running,但是READY状态一直为0/1,通过describe查询到如下内容
Readiness probe failed: HTTP probe failed with statuscode: 500
1.报错内容
root@master:~/metrics-server# kubectl get pods -n kube-system metrics-server-dd7677d75-grfcq
NAME READY STATUS RESTARTS AGE
metrics-server-dd7677d75-grfcq 0/1 Running 0 11m
root@master:~/metrics-server# kubectl describe pods -n kube-system metrics-server-dd7677d75-grfcq
Name: metrics-server-dd7677d75-grfcq
Namespace: kube-system
Priority: 2000000000
Priority Class Name: system-cluster-critical
Service Account: metrics-server
Node: node01.sec.lab/10.22.4.12
Start Time: Thu, 22 Aug 2024 11:47:55 +0800
Labels: k8s-app=metrics-server
pod-template-hash=dd7677d75
Annotations: cni.projectcalico.org/containerID: 5b9994d73afeeb910ca8c6208d835f45699d7c4c5216d703d8ffe1063df1ae1e
cni.projectcalico.org/podIP: 10.244.182.3/32
cni.projectcalico.org/podIPs: 10.244.182.3/32
Status: Running
IP: 10.244.182.3
IPs:
IP: 10.244.182.3
Controlled By: ReplicaSet/metrics-server-dd7677d75
Containers:
metrics-server:
Container ID: containerd://c5e44c1653320d08a1109e8cc1afd18617c649069420949e69cfc01226463ee7
Image: m.daocloud.io/registry.k8s.io/metrics-server/metrics-server:v0.7.1
Image ID: m.daocloud.io/registry.k8s.io/metrics-server/metrics-server@sha256:db3800085a0957083930c3932b17580eec652cfb6156a05c0f79c7543e80d17a
Port: 10250/TCP
Host Port: 0/TCP
SeccompProfile: RuntimeDefault
Args:
--cert-dir=/tmp
--secure-port=10250
--kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
--kubelet-use-node-status-port
--metric-resolution=15s
State: Running
Started: Thu, 22 Aug 2024 11:47:56 +0800
Ready: False
Restart Count: 0
Requests:
cpu: 100m
memory: 200Mi
Liveness: http-get https://:https/livez delay=0s timeout=1s period=10s #success=1 #failure=3
Readiness: http-get https://:https/readyz delay=20s timeout=1s period=10s #success=1 #failure=3
Environment: <none>
Mounts:
/tmp from tmp-dir (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-ct8r8 (ro)
Conditions:
Type Status
PodReadyToStartContainers True
Initialized True
Ready False
ContainersReady False
PodScheduled True
Volumes:
tmp-dir:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
kube-api-access-ct8r8:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
QoS Class: Burstable
Node-Selectors: kubernetes.io/os=linux
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 11m default-scheduler Successfully assigned kube-system/metrics-server-dd7677d75-grfcq to node01.sec.lab
Normal Pulled 11m kubelet Container image "m.daocloud.io/registry.k8s.io/metrics-server/metrics-server:v0.7.1" already present on machine
Normal Created 11m kubelet Created container metrics-server
Normal Started 11m kubelet Started container metrics-server
Warning Unhealthy 79s (x66 over 10m) kubelet Readiness probe failed: HTTP probe failed with statuscode: 500
root@master:~/metrics-server#
2.处理方式(二选一)
2.1.直接kubectl修改
# 在containers args内增加- --kubelet-insecure-tls内容
$ kubectl edit deployments.apps -n kube-system metrics-server
# 修改后的args
containers:
- args:
- --cert-dir=/tmp
- --secure-port=10250
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
- --metric-resolution=15s
- --kubelet-insecure-tls
2.2.YAML文件修改
# 在components.yaml文件内增加- --kubelet-insecure-tls内容
root@master:~/metrics-server# head -140 components.yaml | tail -10
k8s-app: metrics-server
spec:
containers:
- args:
- --cert-dir=/tmp
- --secure-port=10250
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
- --metric-resolution=15s
- --kubelet-insecure-tls
root@master:~/metrics-server#
3.运行状态
$ kubectl get pods -n kube-system metrics-server-869c9dc79d-9rvnh
NAME READY STATUS RESTARTS AGE
metrics-server-869c9dc79d-9rvnh 1/1 Running 0 90m
$ kubectl top pod -A
NAMESPACE NAME CPU(cores) MEMORY(bytes)
calico-apiserver calico-apiserver-7b4f8c9f47-j98nx 9m 38Mi
calico-apiserver calico-apiserver-7b4f8c9f47-vll2x 3m 30Mi
calico-system calico-kube-controllers-7bcb74cbc6-kzc7f 7m 12Mi
calico-system calico-node-dhhnc 35m 123Mi
calico-system calico-node-jtwth 39m 123Mi
calico-system calico-node-m64bg 36m 124Mi
calico-system calico-typha-86444685d4-42rs6 4m 17Mi
calico-system calico-typha-86444685d4-mrwz5 3m 17Mi
calico-system csi-node-driver-66wrl 1m 7Mi
calico-system csi-node-driver-kpf8z 1m 7Mi
calico-system csi-node-driver-nxlw9 1m 8Mi
kube-system coredns-857d9ff4c9-4tt76 4m 12Mi
kube-system coredns-857d9ff4c9-zh576 3m 12Mi
kube-system etcd-master.sec.lab 46m 97Mi
kube-system kube-apiserver-master.sec.lab 112m 369Mi
kube-system kube-controller-manager-master.sec.lab 30m 50Mi
kube-system kube-proxy-89bk2 18m 20Mi
kube-system kube-proxy-bjlmp 19m 20Mi
kube-system kube-proxy-mk257 1m 20Mi
kube-system kube-scheduler-master.sec.lab 8m 18Mi
kube-system metrics-server-869c9dc79d-9rvnh 7m 18Mi
标签:--,probe,system,server,metrics,failed,报错,kube,calico
From: https://www.cnblogs.com/amsilence/p/18373660