一、环境介绍
主要演示prometheus在k8s集群中如何通过服务自动去发现k8s集群自有服务及其他服务发现场景,后续会演示集群外部署prometheus自动发现k8s服务并获取数据
创建监控使用的namespaces
kubectl create ns monitoring
配置docker可以下载镜像
[root@k8s-master deploy]# cat /etc/docker/daemon.json #配置registry-mirrors { "registry-mirrors": ["https://docker.m.daocloud.io","https://huecker.io","https://dockerhub.timeweb.cloud"], "exec-opts": ["native.cgroupdriver=systemd"] } [
prometheus相关镜像参考地址:https://hub.docker.com/u/prom 都在prom下
二、k8s安装node-exporter
参考地址:https://github.com/prometheus/node_exporter
kubectl create ns monitoring #创建专有的NS mkdir -p manifest/monitor cd manifest/monitor
vim node-export-ds.yaml #注意类型是DaemonSet,相比二进制部署,daemonSet方式部署在节点经常变动的情况下避免了手动安装的问题 apiVersion: apps/v1 kind: DaemonSet metadata: name: node-exporter namespace: monitoring labels: k8s-app: node-exporter spec: selector: matchLabels: k8s-app: node-exporter template: metadata: labels: k8s-app: node-exporter spec: tolerations: - effect: NoSchedule key: node-role.kubernetes.io/master #注意,你的master节点污点和我的未必一致 通过describe命令查看 containers: - image: registry-vpc.cn-shanghai.aliyuncs.com/zdbl-base/node-exporter:v1.3.1 #这里我是下载到我的私有仓库,可以直接使用官方的镜像地址,docker配置后应当可以直接下载 name: prometheus-node-exporter ports: - containerPort: 9100 hostPort: 9100 #如果不写默认与containerPort一致,注意端口冲突 protocol: TCP name: metrics volumeMounts: - mountPath: /host/proc name: proc - mountPath: /host/sys name: sys - mountPath: /host name: rootfs args: - --path.procfs=/host/proc - --path.sysfs=/host/sys - --path.rootfs=/host volumes: - name: proc hostPath: path: /proc - name: sys hostPath: path: /sys - name: rootfs hostPath: path: / hostNetwork: true hostPID: true hostIPC: true kubectl apply -f node-export-ds.yaml kubectl get pods -n monitoring NAME READY STATUS RESTARTS AGE node-exporter-7bp55 1/1 Running 0 1m node-exporter-klx2b 1/1 Running 0 1m node-exporter-pcht8 1/1 Running 0 1m netstat -tnlp|grep 9100 #因为是hostPort
浏览器访问节点 9100 端口 /metrics 接口
二、k8s安装cadvisor
略,参考https://www.cnblogs.com/panwenbin-logs/p/18385045
三、安装NFS 存储类
如果有请忽略
参考文档:https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner/
wget https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner/archive/refs/tags/nfs-subdir-external-provisioner-4.0.18.tar.gz tar xf nfs-subdir-external-provisioner-4.0.18.tar.gz cd nfs-subdir-external-provisioner-nfs-subdir-external-provisioner-4.0.18/ cp -r deploy deploy-bak cd deploy kubectl create ns nfs-provisioner #创建专用的名称空间 sed -i 's/namespace: default/namespace: nfs-provisioner/g' `grep -rl 'namespace: default' ./` #替换配置文件名称空间 vim deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: name: nfs-client-provisioner labels: app: nfs-client-provisioner # replace with namespace where provisioner is deployed namespace: nfs-provisioner spec: replicas: 1 strategy: type: Recreate selector: matchLabels: app: nfs-client-provisioner template: metadata: labels: app: nfs-client-provisioner spec: serviceAccountName: nfs-client-provisioner containers: - name: nfs-client-provisioner image: registry-vpc.cn-shanghai.aliyuncs.com/zdbl-base/nfs-subdir-external-provisioner:v4.0.2 volumeMounts: - name: nfs-client-root mountPath: /persistentvolumes env: - name: PROVISIONER_NAME value: k8s-sigs.io/nfs-subdir-external-provisioner - name: NFS_SERVER #server及挂载点根据你的实际情况进行配置,此处使用的是阿里云的NAS value: 332fa4bbd2-rho88.cn-shanghai.nas.aliyuncs.com - name: NFS_PATH value: /k8s-nfs-sc volumes: - name: nfs-client-root nfs: server: 332fa4bbd2-rho88.cn-shanghai.nas.aliyuncs.com #上面的保持一致 path: /k8s-nfs-sc kubectl apply -k . kubectl get pods -n nfs-provisioner
验证
vim test-claim.yaml kind: PersistentVolumeClaim apiVersion: v1 metadata: name: test-claim spec: storageClassName: nfs-client #注意此处名称和class.yaml中定义的名称保持一致 accessModes: - ReadWriteMany resources: requests: storage: 1Mi --- kind: Pod apiVersion: v1 metadata: name: test-pod spec: containers: - name: test-pod image: busybox:stable command: - "/bin/sh" args: - "-c" - "touch /mnt/SUCCESS && exit 0 || exit 1" #挂载并创建一个文件 volumeMounts: - name: nfs-pvc mountPath: "/mnt" restartPolicy: "Never" volumes: - name: nfs-pvc persistentVolumeClaim: claimName: test-claim kubectl apply -f test-pod.yaml
kubectl get pods NAME READY STATUS RESTARTS AGE test-pod 0/1 Completed 0 63m
kubectl get pv NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE pvc-f4e9b50d-6a19-4c7f-bdc9-18855c712fdd 1Mi RWX Delete Bound default/test-claim nfs-client 63m
kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE test-claim Bound pvc-f4e9b50d-6a19-4c7f-bdc9-18855c712fdd 1Mi RWX nfs-client 63m
ll /mnt/k8s-nfs-sc/default-test-claim-pvc-f4e9b50d-6a19-4c7f-bdc9-18855c712fdd/SUCCESS -rw-r--r-- 1 root root 0 2024-09-03 09:29 /mnt/k8s-nfs-sc/default-test-claim-pvc-f4e9b50d-6a19-4c7f-bdc9-18855c712fdd/SUCCESS
设置为NFS默认SC
kubectl get sc NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE nfs-client k8s-sigs.io/nfs-subdir-external-provisioner Delete Immediate false 14m
kubectl patch storageclass nfs-client -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' #通过打补丁的方式设置 nfs-client 这个SC为默认的SC storageclass.storage.k8s.io/nfs-client patched
kubectl get sc #可以看到nfs-clinet SC已经有default标识了 NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE nfs-client (default) k8s-sigs.io/nfs-subdir-external-provisioner Delete Immediate false 15m
四、k8s部署prometheus
1.创建prometheus相关配置文件
prometheus-cm.yaml
kind: ConfigMap apiVersion: v1 metadata: labels: app: prometheus name: prometheus-config namespace: monitoring data: prometheus.yml: | global: #全局配置,同二进制 scrape_interval: 15s scrape_timeout: 10s evaluation_interval: 1m scrape_configs: #数据抓取配置 - job_name: 'kubernetes-node' #k8s自动发现 kubernetes_sd_configs: - role: node relabel_configs: - source_labels: [__address__] regex: '(.*):10250' #kubelet 默认访问10250 但是我们的node-export部署的端口是9100,所以在抓取数据前需要修改数据采集的端口 replacement: '${1}:9100' #如果监控cadvisor 复制kubernetes-node job_name 将9100端口修改为cadvisor端口即可 target_label: __address__ action: replace - action: labelmap regex: __meta_kubernetes_node_label_(.+) - job_name: 'kubernetes-node-cadvisor' #这来抓取cadvisor数据通过 kubeapi,因为k8s已经内置了cadvisor,如果通过部署的DS获取,参考上面的 kubernetes-node job_name kubernetes_sd_configs: - role: node scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt #访问api-server需要认证,此处使用证书,相关证书和token已经自动挂载到pod中 bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor #变量替换,将$1替换为node名称,名称从上面的regex 正则中获取 - job_name: 'kubernetes-apiserver' kubernetes_sd_configs: - role: endpoints scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt #xxxxxxxxxxxx bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep #保留source_labels中标签,其他过滤 regex: default;kubernetes;https #与source_labels中对应,分别为名称空间 svc名称 svc中port的名称,注意此处是名称不是协议,协议在scheme字段配置 - job_name: 'kubernetes-service-endpoints' kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] action: replace target_label: __scheme__ regex: (https?) - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] action: replace target_label: __address__ regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 - action: labelmap regex: __meta_kubernetes_service_label_(.+) #匹配svc metadata中 lable的标签名称和值 - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] action: replace target_label: kubernetes_name
prometheus-deployment.yaml
apiVersion: apps/v1 kind: Deployment metadata: name: prometheus-server namespace: monitoring labels: app: prometheus spec: replicas: 1 selector: matchLabels: app: prometheus component: server template: metadata: labels: app: prometheus component: server annotations: prometheus.io/scrape: 'false' spec: serviceAccountName: monitor containers: - name: prometheus image: registry-vpc.cn-shanghai.aliyuncs.com/zdbl-base/prometheus:v2.36.1 command: - prometheus - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.path=/prometheus - --storage.tsdb.retention=720h - --web.enable-lifecycle ports: - containerPort: 9090 protocol: TCP volumeMounts: - mountPath: /etc/prometheus/prometheus.yml name: prometheus-config subPath: prometheus.yml - mountPath: /prometheus/ name: prometheus-storage-volume volumes: - name: prometheus-config configMap: name: prometheus-config items: - key: prometheus.yml path: prometheus.yml mode: 0644 - name: prometheus-storage-volume #这里使用了NFS存储类创建的PVC,如果没有可以直接使用hostPath方式 persistentVolumeClaim: claimName: prometheus-data
prometheus-pvc.yaml
kind: PersistentVolumeClaim apiVersion: v1 metadata: name: prometheus-data namespace: monitoring spec: storageClassName: nfs-client accessModes: - ReadWriteMany resources: requests: storage: 10G
prometheus-rbac.yaml
apiVersion: v1 kind: ServiceAccount #prometheus监控集群需要很多权限,此处为了方便支持通过 cluster-admin授权,实际应当给予所以资源读取权限即可 metadata: name: monitor namespace: monitoring --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: monitor-clusterrolebinding roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: cluster-admin subjects: - kind: ServiceAccount name: monitor namespace: monitoring
prometheus-svc.yaml
apiVersion: v1 kind: Service metadata: name: prometheus namespace: monitoring labels: app: prometheus spec: type: NodePort ports: - port: 9090 targetPort: 9090 nodePort: 30090 protocol: TCP selector: app: prometheus component: server
2.应用配置文件
kubectl apply -f . kubectl get pods -n monitoring |grep prometheus kubectl logs -n monitoring prometheus-server-5d5cc898b6-q69wp
3.访问web页面 节点IP:30090
prometheus参考:https://prometheus.io/docs/prometheus/latest/configuration/configuration/
标签:__,node,name,kubernetes,笔记,prometheus,nfs,集群 From: https://www.cnblogs.com/panwenbin-logs/p/18394089