1、定位硬盘和报错日志
#1. 例如更换osd.1硬盘,找出osd.1对应的磁盘
[root@ceph1 mapper]# kubectl get po rook-ceph-osd-1-79fcff4bbd-4gq2b -n rook-ceph -o yaml | grep UUID
k:{"name":"ROOK_OSD_UUID"}:
- name: ROOK_OSD_UUID
- "\nset -ex\n\nOSD_ID=1\nOSD_UUID=d3483ef7-2ddf-46d9-9f0d-79999b25180d\nOSD_STORE_FLAG=\"--bluestore\"\nOSD_DATA_DIR=/var/lib/ceph/osd/ceph-\"$OSD_ID\"\nCV_MODE=lvm\nDEVICE=/dev/ceph-4abbbe54-da99-4cac-bdb7-f3ef744ecf78/osd-data-dc8616c1-9c3d-48cc-9eba-937861d419d4\nMETADATA_DEVICE=\"$ROOK_METADATA_DEVICE\"\nWAL_DEVICE=\"$ROOK_WAL_DEVICE\"\n\n#
\"$OSD_ID\" \"$OSD_UUID\"\n\n\t# copy the tmpfs directory to a temporary directory\n\t#
[root@ceph2 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 20G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 19G 0 part
├─centos-root 253:0 0 17G 0 lvm /
└─centos-swap 253:1 0 2G 0 lvm
sdb 8:16 0 10G 0 disk
└─ceph--1dbde574--6d46--4378--8e58--3963835c0405-osd--data--12d2a2b5--81c6--467e--b17f--cae8c63ef3f4 253:3 0 10G 0 lvm
sdc 8:32 0 10G 0 disk
└─ceph--4abbbe54--da99--4cac--bdb7--f3ef744ecf78-osd--data--dc8616c1--9c3d--48cc--9eba--937861d419d4 253:2 0 10G 0 lvm
sr0 11:0 1 4.4G 0 rom
2、先把operator设置为0
[root@ceph1 mapper]# kubectl scale deploy -n rook-ceph rook-ceph-operator --replicas=0
deployment.apps/rook-ceph-operator scaled
3、修改配置,将需要移除的盘移除:
[root@ceph1 mapper]# kubectl edit cephcluster -n rook-ceph rook-ceph
cephcluster.ceph.rook.io/rook-ceph edited
- devices:
- name: sdb
- name: sdc (删除)
4、手动移除对应osd
[root@ceph1 ~]# kubectl exec -it -n rook-ceph rook-ceph-tools-6f44db7c58-zw47s bash
ceph osd set noup
ceph osd down 0
ceph osd out 0
#ceph -w查看数据均衡进度, 等待数据均衡完成
[root@ceph1 ~]# kubectl exec -it -n rook-ceph rook-ceph-tools-6f44db7c58-zw47s bash
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
[root@rook-ceph-tools-6f44db7c58-zw47s /]#
[root@rook-ceph-tools-6f44db7c58-zw47s /]#
[root@rook-ceph-tools-6f44db7c58-zw47s /]#
[root@rook-ceph-tools-6f44db7c58-zw47s /]#
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph osd set noup
noup is set
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.05878 root default
-3 0.01959 host ceph1
0 hdd 0.00980 osd.0 up 1.00000 1.00000
5 hdd 0.00980 osd.5 up 1.00000 1.00000
-5 0.01959 host ceph2
1 hdd 0.00980 osd.1 up 1.00000 1.00000
2 hdd 0.00980 osd.2 up 1.00000 1.00000
-7 0.01959 host ceph3
3 hdd 0.00980 osd.3 up 1.00000 1.00000
4 hdd 0.00980 osd.4 up 1.00000 1.00000
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph osd down 1
marked down osd.1.
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph osd out 1
marked out osd.1.
[root@rook-ceph-tools-6f44db7c58-zw47s /]#
[root@rook-ceph-tools-6f44db7c58-zw47s /]#
[root@rook-ceph-tools-6f44db7c58-zw47s /]#
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph -w
cluster:
id: ee795c82-8de1-4dc9-af64-764ffbafbd19
health: HEALTH_WARN
noup flag(s) set
services:
mon: 3 daemons, quorum a,b,c (age 4h)
mgr: a(active, since 4h)
osd: 6 osds: 5 up (since 15s), 5 in (since 3s)
flags noup
data:
pools: 2 pools, 33 pgs
objects: 0 objects, 0 B
usage: 5.1 GiB used, 45 GiB / 50 GiB avail
pgs: 17 active+clean
16 active+undersized
5、均衡数据完成后移除对应的osd
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph osd purge 1 --yes-i-really-mean-it
purged osd.1
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph auth del osd.1
entity osd.1 does not exist
6、检查ceph状态以及osd状态
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph -s
cluster:
id: ee795c82-8de1-4dc9-af64-764ffbafbd19
health: HEALTH_WARN
noup flag(s) set
services:
mon: 3 daemons, quorum a,b,c (age 4h)
mgr: a(active, since 4h)
osd: 5 osds: 5 up (since 4m), 5 in (since 4m)
flags noup
data:
pools: 2 pools, 33 pgs
objects: 0 objects, 0 B
usage: 5.1 GiB used, 45 GiB / 50 GiB avail
pgs: 33 active+clean
[root@rook-ceph-tools-6f44db7c58-zw47s /]#
[root@rook-ceph-tools-6f44db7c58-zw47s /]#
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.04898 root default
-3 0.01959 host ceph1
0 hdd 0.00980 osd.0 up 1.00000 1.00000
5 hdd 0.00980 osd.5 up 1.00000 1.00000
-5 0.00980 host ceph2
2 hdd 0.00980 osd.2 up 1.00000 1.00000
-7 0.01959 host ceph3
3 hdd 0.00980 osd.3 up 1.00000 1.00000
4 hdd 0.00980 osd.4 up 1.00000 1.00000
7、移除pod,和判断删除对应的job
[root@ceph1 mapper]# kubectl delete deploy -n rook-ceph rook-ceph-osd-1
deployment.apps "rook-ceph-osd-1" deleted
8、恢复配置
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph osd unset noup
noup is unset
[root@rook-ceph-tools-6f44db7c58-zw47s /]# ceph -s
cluster:
id: ee795c82-8de1-4dc9-af64-764ffbafbd19
health: HEALTH_OK
services:
mon: 3 daemons, quorum a,b,c (age 4h)
mgr: a(active, since 4h)
osd: 5 osds: 5 up (since 6m), 5 in (since 6m)
data:
pools: 2 pools, 33 pgs
objects: 0 objects, 0 B
usage: 5.1 GiB used, 45 GiB / 50 GiB avail
pgs: 33 active+clean
9、格式化磁盘或者下架磁盘
对应机器执⾏,输⼊对应需要下架的盘
#!/bin/bash
yum -y install gdisk
read -p "input your disk [/dev/sda]:" DISK
# 磁盘去格式化
# DISK="/dev/sda"
# Zap the disk to a fresh, usable state (zap-all is important, b/c MBR has to be d:clean)
# You will have to run this step for all disks.
sgdisk --zap-all $DISK
dd if=/dev/zero of="$DISK" bs=1M count=100 oflag=direct,dsync
# These steps only have to be run once on each node
# If rook sets up osds using ceph-volume, teardown leaves some devices mapped that lock the disks.
#找到对应的磁盘的ceph信息进行檫除
ls /dev/mapper/ceph--d41fd45b--a5e4--4c45--ab3f--d344c02fa4c9-osd--data--49dda9c2--1907--4225--ad8b--035424fd0484 | xargs -I% -- dmsetup remove %
# ceph-volume setup can leave ceph-<UUID> directories in /dev (unnecessary clutter)
rm -rf /dev/rm -rf ceph-4abbbe54-da99-4cac-bdb7-f3ef744ecf78/
lsblk -f
[root@ceph2 mapper]# lsblk -f
NAME FSTYPE LABEL UUID MOUNTPOINT
sda
├─sda1 xfs 4b8b54bd-9ac2-4bf0-8e64-c4a929a986fa /boot
└─sda2 LVM2_member 9kfG02-udDn-cx3l-2pxQ-fDwK-sUr1-9I079W
├─centos-root xfs 5085bc7a-4955-487e-ae36-b9357fbc9721 /
└─centos-swap swap 618b4fdd-5b18-436d-97c1-4fd786d706f4
sdb LVM2_member dCABKo-MdC0-mfNn-TT36-ZP2F-FEx1-624R8u
└─ceph--1dbde574--6d46--4378--8e58--3963835c0405-osd--data--12d2a2b5--81c6--467e--b17f--cae8c63ef3f4
sdc
sr0 iso9660 CentOS 7 x86_64 2020-11-04-11-36-43-00
10、恢复rook的operator
[root@ceph1 mapper]# kubectl scale deploy -n rook-ceph rook-ceph-operator --replicas=1
deployment.apps/rook-ceph-operator scaled
11、下盘完成
12、上盘,加入硬盘osd
1、修改配置,将需要加入的盘添加上去即可.如果是使用过的磁盘先格式化在添加
[root@ceph1 mapper]# kubectl edit cephcluster -n rook-ceph rook-ceph