系統(tǒng)環(huán)境: centos73.10.0-514.26.2.el7.x86_64
機器數(shù)量:五臺
硬盤:四塊一塊為系統(tǒng)盤芍躏,其他三塊留作他用
命名規(guī)則:ceph1 ceph2 ceph3 ceph4ceph1為監(jiān)控節(jié)點
IP規(guī)劃:192.168.238.135 ceph1#安裝部署稍味,監(jiān)控節(jié)點
192.168.238.136 ceph2
192.168.238.137 ceph3
192.168.238.139 ceph4#客戶機
192.168.238.139 ceph5#測試添加刪除osd節(jié)點1.前提(#所有的機器都執(zhí)行)
1.1修改hostname
#分別在各個節(jié)點修改主機名
# vi /etc/hostname
#hostname
ceph1
1.2配置解析
#全體都有,并測試網(wǎng)絡(luò)是否連通
# cat /etc/hosts
127.0.0.1localhost localhost.localdomain localhost4 localhost4.localdomain4
::1localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.238.135 ceph1
192.168.238.136 ceph2
192.168.238.137 ceph3
192.168.238.138 ceph4
192.168.238.139 ceph5
測試:
#ping ceph1
PING ceph1 (192.168.238.135) 56(84) bytes of data.
64 bytes from ceph1 (192.168.238.135): icmp_seq=1ttl=64 time=0.059 ms
64 bytes from ceph1 (192.168.238.135): icmp_seq=2ttl=64 time=0.030 ms
1.3防火墻(關(guān)閉)
# systemctl stop firewalld
# systemctl disable firewalld
1.4禁用SELINUX
# setenforce 0#臨時禁用
# vim /etc/selinux/config
# cat /etc/selinux/config | grep disabled#永久禁用
#disabled -No SELinux policy is loaded.
SELINUX=disabled
1.5安裝ssh
# yum install openssh-server
1.6安裝ntp
# yum install ntp -y#安裝ntp
# crontab -e#編輯
#加入定時同步任務(wù)
30 2 * * */usr/sbin/ntpdate ntp1.aliyun.com
#重啟crond
systemctl restart
crond
2.創(chuàng)建ceph集群
[root@ceph1
my-cluster]#ceph-deploy new ceph1
2.1在所有節(jié)點安裝ceph二進(jìn)制軟件包
[root@ceph1 my-cluster]#ceph-deploy install ceph1 ceph2 ceph3
完成查看ceph版本
[root@ceph1 ~]#ceph -v
ceph version 10.2.9 (2ee413f77150c0f375ff6f10edd6c8f9c7d060d0)
2.2在ceph1上創(chuàng)建第一個ceph monitor
[root@ceph1 ~]#cdmy-cluster/#注意執(zhí)行的路徑
[root@ceph1 my-cluster]#
成功后,檢查集群的狀態(tài)
[root@ceph1 my-cluster]#ceph-s
clusterb014bf0d-43ea-424f-9358-a32ae5a5cd59
health HEALTH_ERR#集群處于不健康狀態(tài)
no osds
monmap e1: 1 mons at{ceph1=192.168.238.140:6789/0}
election epoch 3, quorum 0 ceph1
osdmap e1: 0 osds: 0 up, 0 in
flagssortbitwise,require_jewel_osds
pgmap v2: 64 pgs, 1 pools, 0 bytes data,0 objects
0 kB used, 0 kB / 0 kB avail
64Reating
2.3在ceph1上創(chuàng)建OSD
2.3.1查看ceph1的可用硬盤
[root@ceph1 my-cluster]#ceph-deploy disk list ceph1
[ceph_deploy.conf][DEBUG] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO] Invoked (1.5.37): /usr/bin/ceph-deploy disklist ceph1
[ceph_deploy.cli][INFO] ceph-deploy options:
[ceph_deploy.cli][INFO]username:None
[ceph_deploy.cli][INFO]verbose: False
[ceph_deploy.cli][INFO]overwrite_conf:False
[ceph_deploy.cli][INFO]subcommand:list
[ceph_deploy.cli][INFO]quiet:False
[ceph_deploy.cli][INFO]cd_conf:
[ceph_deploy.cli][INFO]cluster:ceph
[ceph_deploy.cli][INFO]func:
[ceph_deploy.cli][INFO]ceph_conf: None
[ceph_deploy.cli][INFO]default_release:False
[ceph_deploy.cli][INFO]disk:[('ceph1', None, None)]
[ceph1][DEBUG] connected to host: ceph1
[ceph1][DEBUG] detect platform information from remote host
[ceph1][DEBUG] detect machine type
[ceph1][DEBUG] find the location of an executable
[ceph_deploy.osd][INFO] Distro info: CentOS Linux 7.3.1611 Core
[ceph_deploy.osd][DEBUG] Listing disks on ceph1...
[ceph1][DEBUG] find the location of an executable
[ceph1][INFO] Running command: /usr/sbin/ceph-disk list
[ceph1][DEBUG] /dev/dm-0 other, xfs, mounted on /
[ceph1][DEBUG] /dev/dm-1 swap, swap
[ceph1][DEBUG] /dev/sda :
[ceph1][DEBUG]/dev/sda2 other, LVM2_member
[ceph1][DEBUG]/dev/sda1 other, xfs, mounted on /boot
[ceph1][DEBUG] /dev/sdb :
[ceph1][DEBUG ] /dev/sdb other, unknown
[ceph1][DEBUG ] /dev/sdc other, unknown#三塊硬盤
[ceph1][DEBUG ] /dev/sdd other, unknown
[ceph1][DEBUG ] /dev/sr0 other,unknown
2.3.2 Disk zap刪除現(xiàn)有的分區(qū)表和磁盤內(nèi)容(謹(jǐn)慎看好了)
[root@ceph1 my-cluster]#ceph-deploydisk zap ceph1:sdb ceph1:sdc ceph1:sdd
2.3.3 Osd create命令會準(zhǔn)備磁盤篡诽,首先使用xfs格式化磁盤,在激活磁盤的第一第二分區(qū)做數(shù)據(jù)分區(qū)和日志分區(qū)
[root@ceph1 my-cluster]#ceph-deploy osd create ceph1:sdb ceph1:sdc ceph1:sdd
查看ceph的狀態(tài)和osd的個數(shù)蔫饰,在這個階段你的集群處于不健康狀態(tài)
3.擴展ceph集群
3.1增加mon的個數(shù)酌呆,注意mon總的個數(shù)必須為奇數(shù)
編輯ceph1上的ceph.conf文件加入:
public network=192.168.238.0/24
3.2在ceph1上執(zhí)行ceph-deploy創(chuàng)建一個monitor(ceph2疗锐,ceph3)
[root@ceph1 my-cluster]#ceph-deploy mon create ceph2
[root@ceph1 my-cluster]#ceph-deploy mon create ceph3
檢查ceph集群狀態(tài):
[root@ceph1 my-cluster]#ceph-s
clusterb014bf0d-43ea-424f-9358-a32ae5a5cd59
health HEALTH_ERR
64 pgs are stuck inactive for morethan 300 seconds
64 pgs degraded
64 pgs stuck degraded
64 pgs stuck inactive
64 pgs stuck unclean
64 pgs stuck undersized
64 pgs undersized
too few PGs per OSD (21 < min30)
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 6, quorum 0,1,2ceph1,ceph3,ceph2
osdmap e10: 3 osds: 3 up, 3 in
flagssortbitwise,require_jewel_osds
pgmap v18: 64 pgs, 1 pools, 0 bytes data,0 objects
101504 kB used, 15227 MB / 15326 MBavail
64 undersized+degraded+peered
[root@ceph1my-cluster]# ceph mon stat
e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0},election epoch 6, quorum 0,1,2 ceph1,ceph3,ceph2
3.2在ceph2坊谁,ceph3上使用ceph-deploy執(zhí)行disk list和disk zap命令,并執(zhí)行osd create創(chuàng)建osd:
[root@ceph1 my-cluster]#ceph-deploydisk list ceph2 ceph3
[root@ceph1 my-cluster]#ceph-deploydisk zap ceph2:sdb ceph2:sdc ceph2:sdd
[root@ceph1 my-cluster]#ceph-deploydisk zap ceph3:sdb ceph3:sdc ceph3:sdd
[root@ceph1 my-cluster]#ceph-deployosd create ceph2:sdb ceph2:sdc ceph2:sdd
3.3添加了更多osd之后滑臊,調(diào)整rbd存儲池的pg_num和pgp_num的值口芍,來達(dá)到集群的HEALTH_OK狀態(tài)
[root@ceph1 my-cluster]#ceph-s
clusterb014bf0d-43ea-424f-9358-a32ae5a5cd59
health HEALTH_WARN
too few PGs per OSD (21 < min30)
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 6, quorum 0,1,2ceph1,ceph3,ceph2
osdmap e43: 9 osds: 9 up, 9 in
flagssortbitwise,require_jewel_osds
pgmap v97: 64 pgs, 1 pools, 0 bytes data,0 objects
305 MB used, 45675 MB / 45980 MBavail
64 active+clean
[root@ceph1my-cluster]#ceph osd pool set rbd pg_num 256
set pool 0 pg_num to 256
[root@ceph1 my-cluster]#cephosd pool set rbd pgp_num 256
set pool 0 pgp_num to 256
查看集群的狀態(tài):
[root@ceph1 my-cluster]#ceph-s
clusterb014bf0d-43ea-424f-9358-a32ae5a5cd59
health HEALTH_OK
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 6, quorum 0,1,2ceph1,ceph3,ceph2
osdmap e47: 9 osds: 9 up, 9 in
flagssortbitwise,require_jewel_osds
pgmap v111: 256 pgs, 1 pools, 0 bytesdata, 0 objects
315 MB used, 45665 MB / 45980 MBavail
215 active+clean
41 activating
4.相關(guān)命令
4.1檢查ceph安裝狀態(tài)
ceph -s ceph status
4.2檢查集群健康狀況
ceph -w
4.3查看ceph monitor仲裁狀態(tài)
ceph quorum_status --format json-pretty
4.4導(dǎo)出ceph monitor信息
ceph mon dump
4.5檢查集群使用使用狀態(tài)
ceph df
4.6檢查ceph monitor osd和pg狀態(tài)
ceph mon stat
ceph osd stat
ceph pg stat
4.7列表pg
ceph pg dump
4.8列表ceph存儲池
ceph osd ls pools
4.9檢查osd的crush map
ceph osd tree
4.10列表集群的認(rèn)證密鑰
ceph auth list
5.部署ceph mds
Mds只有cephfs文件系統(tǒng)需要,其他存儲方式(塊存儲和對象存儲)不需要
5.1在ceph1節(jié)點雇卷,使用ceph-deploy命令吧mds部署和配置到ceph2節(jié)點上
[root@ceph1 my-cluster]#ceph-deploy--overwrite-conf mds create ceph2
5.2進(jìn)行下邊的操作讓cephfs可以訪問
[root@ceph1 my-cluster]#ssh ceph2 ceph mds stat
e2:, 1 up:standby
5.3為ceph文件系統(tǒng)創(chuàng)建數(shù)據(jù)和元數(shù)據(jù)存儲池
[root@ceph1 my-cluster]#ceph osd pool create cephfs_data 64 64
pool 'cephfs_data' created
[root@ceph1 my-cluster]#ceph osd pool create cephfs_metadata 64 64
pool 'cephfs_metadata' created
5.4最后創(chuàng)建ceph文件系統(tǒng)鬓椭。這個命令執(zhí)行后mds將會職位活躍狀態(tài),cephfs也處于可用狀態(tài)
[root@ceph1 my-cluster]#ceph fs new cephfs cephfs_metadata cephfs_data
new fs with metadata pool 2 and data pool 1
驗證mds和cephfs狀態(tài)
[root@ceph1 my-cluster]#ceph mds stat
e5: 1/1/1 up {0=ceph2=up:active}
[root@ceph1 my-cluster]# ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools:[cephfs_data ]
6.掛載系統(tǒng)
6.1通過內(nèi)核訪問系統(tǒng):
6.1.1查看內(nèi)核版本:
[root@ceph4 ~]#uname -r
3.10.0-514.26.2.el7.x86_64
6.1.2創(chuàng)建目錄掛載點
[root@ceph4 ~]#mkdir /mnt/cephfs
獲取創(chuàng)建的用戶client.cephfs的密鑰
[root@ceph1 my-cluster]#cat ceph.client.admin.keyring#查看密鑰
[client.admin]
key =AQBq2LFZkfx9LxAAT3k2LlmxoByWnbGulrmyNg==
caps mds = "allow*"
caps mon = "allow*"
caps osd = "allow*"
6.1.3掛載測試
前提是client也安裝了ceph
[root@ceph1 my-cluster]#ceph-deploy admin ceph4#推送配置到ceph4聋庵,復(fù)制ceph.con和ceph.client.admin.keyring到本機/etc/ceph目錄
[root@ceph1 my-cluster]#ceph-deploy install ceph4#ceph1執(zhí)行
[root@ceph4 ~]#mount -t ceph ceph1:6789:/ /mnt/cephfs -oname=admin,secret=AQBq2LFZkfx9LxAAT3k2LlmxoByWnbGulrmyNg==
為了安全,建立一個目錄存放密碼芙粱,再引用
[root@ceph4 ceph]#echo AQBq2LFZkfx9LxAAT3k2LlmxoByWnbGulrmyNg==>/etc/ceph/cephfskey[root@ceph4
ceph]#mount -t ceph ceph1:6789:/ /mnt/cephfs -oname=admin,secretfile=/etc/ceph/cephfskey#掛載測試
6.1.4設(shè)置開機自動掛載
[root@ceph4 ceph]#echo "ceph1:6789:/ /mnt/cephfs cephname=admin,secretfile=/etc/ceph/cephfskey,noatime 0 2" >>/etc/fstab
[root@ceph4 ~]#umount /mnt/cephfs
[root@ceph4 ~]#df
Filesystem1K-blocksUsed Available Use%Mounted on
/dev/mapper/cl-root188559361620700172352369% /
devtmpfs23124402312440% /dev
tmpfs24192002419200% /dev/shm
tmpfs24192047002372202% /run
tmpfs24192002419200% /sys/fs/cgroup
/dev/sda1103833618743685090019% /boot
tmpfs483840483840% /run/user/0
[root@ceph4 ~]#mount -a
[root@ceph4 ~]#df
Filesystem1K-blocksUsed Available Use%Mounted on
/dev/mapper/cl-root18855936 1620700172352369% /
devtmpfs23124402312440% /dev
tmpfs24192002419200% /dev/shm
tmpfs24192047002372202% /run
tmpfs24192002419200% /sys/fs/cgroup
/dev/sda1103833618743685090019% /boot
tmpfs483840483840% /run/user/0
192.168.238.140:6789:/47083520331776467517441% /mnt/cephfs
6.2
FUSE客戶端訪問ceph FS
6.2.1在node4安裝Ceph FUSW包
[root@ceph4 ~]#yum install -y ceph-fuse
6.2.2在ceph1執(zhí)行祭玉,將配置推送到ceph4
[root@ceph4 ~]#ceph-deploy admin ceph4#復(fù)制ceph.con和ceph.client.admin.keyring到本機/etc/ceph目錄
[ceph_deploy.conf][DEBUG ] found configuration file at:/root/.cephdeploy.conf
[ceph_deploy.cli][INFO]Invoked (1.5.37): /usr/bin/ceph-deploy admin ceph4
[ceph_deploy.cli][INFO]ceph-deploy options:
[ceph_deploy.cli][INFO]username: None
[ceph_deploy.cli][INFO]verbose: False
[ceph_deploy.cli][INFO]overwrite_conf: False
[ceph_deploy.cli][INFO]quiet: False
[ceph_deploy.cli][INFO]cd_conf:
[ceph_deploy.cli][INFO]cluster: ceph
[ceph_deploy.cli][INFO]client: ['ceph4']
[ceph_deploy.cli][INFO]func:
[ceph_deploy.cli][INFO]ceph_conf: None
[ceph_deploy.cli][INFO]default_release: False
[ceph_deploy.admin][DEBUG ] Pushing admin keys and conf to ceph4
[ceph4][DEBUG ] connected to host: ceph4
[ceph4][DEBUG ] detect platform information from remote host
[ceph4][DEBUG ] detect machine type
[ceph4][DEBUG ] write cluster configuration to/etc/ceph/{cluster}.conf
6.2.3使用cephfuse客戶端掛載cephfs
[root@ceph4 ceph]#ceph-fuse /mnt/cephfs/或
[root@ceph4 ~]#ceph-fuse -m ceph1:6789 /mnt/cephfs/或者是:
[root@ceph4 ~]#ceph-fuse --keyring/etc/ceph/ceph.client.admin.keyring --name client.admin -m ceph1:6789/mnt/cephfs
ceph-fuse[3285]: starting ceph client
2017-09-08 09:19:06.128647 7f0f90d90ec0 -1 init, newargv =0x7f0f9b390780 newargc=11
ceph-fuse[3285]: starting fuse
Aborted
[root@ceph4 ~]#df
Filesystem1K-blocksUsed Available Use%Mounted on
/dev/mapper/cl-root188559361621360172345769% /
devtmpfs23124402312440% /dev
tmpfs24192002419200% /dev/shm
tmpfs24192047042372162% /run
tmpfs24192002419200% /sys/fs/cgroup
/dev/sda1103833618743685090019% /boot
tmpfs483840483840% /run/user/0
ceph-fuse47083520331776467517441% /mnt/cephfs
6.2.4設(shè)置系統(tǒng)的自動掛載
[root@ceph4 ceph]#cat /etc/fstab#編輯fstab
# /etc/fstab
# Created by anaconda on Sun Sep3 04:52:45 2017
#
# Accessible filesystems, by reference, are maintained under'/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) formore info
#
/dev/mapper/cl-root/xfsdefaults0 0
UUID=e31495e0-b86f-4c4c-a321-322ed31b0906 /bootxfsdefaults0 0
/dev/mapper/cl-swapswapswapdefaults0 0
#ceph1:6789:/ /mnt/cephfs cephname=admin,secretfile=/etc/ceph/cephfskey,noatime 0 2
id=admin,keyring=ceph.client.admin.keyring /mnt/cephfs fuse.cephdefaults 0 0#fuse設(shè)置
6.3
NFS掛載系統(tǒng)
6.3.1在ceph1上安裝nfs-ganesha所需要的包
[root@ceph1 ceph]#yum install -y nfs-utils nfs-ganesha nfs-ganesha-fsal-ceph
6.3.2關(guān)掉防火墻
6.3.3打開NFS所需要的服務(wù)
[root@ceph1 my-cluster]#systemctl start rpcbind
[root@ceph1 my-cluster]#systemctl enable rpcbind
[root@ceph1 my-cluster]#systemctl start rpc-statd.service
6.3.4修改NFS-ganesha的配置文件/etc/ganeshaganesha.conf,并輸入以下內(nèi)容:
Export{
Export_ID=1;
Path="/";
Pseudo="/";
Access_Type=RW;
NFS_Protocols="3";
Squash=No_Root_Squash;
Transport_Protocols=TCP;
SecType="none";
FSAL {
Name=CEPH;
}
}
6.3.5啟動:
[root@ceph1 ganesha]#systemctl start nfs-ganesha
[root@ceph1 ganesha]#systemctl enable nfs-ganesha #設(shè)置開機啟動
Created symlink from/etc/systemd/system/multi-user.target.wants/nfs-ganesha.service to/usr/lib/systemd/system/nfs-ganesha.service.
[root@ceph1 ganesha]#showmount -e
Export list for ceph1:
6.36客戶端設(shè)置
安裝nfs客戶端軟件
[root@ceph4 ceph]#yum install nfs-common -y
掛載:
[root@ceph4 ceph]#mount -o rw,noatime ceph1:/ /mnt/cephfs/(有問題只可讀春畔,不可寫)
192.168.238.140:/ on /mnt/cephfs type nfs4 (rw,noatime,vers=4.0,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=192.168.238.138,local_lock=none,addr=192.168.238.140)
7. Cefh集群維護(hù)
7.1增加mon節(jié)點脱货,mon節(jié)點數(shù)量一定為奇數(shù)個
[root@ceph1 ceph]#ceph-deploy mon create ceph2
[root@ceph1 ceph]# ceph mon stat
e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0},election epoch 8, quorum 0,1,2 ceph1,ceph3,ceph2
7.2增加cephosd節(jié)點
7.2.1安裝ceph軟件
7.2.2查看已有的osd節(jié)點
[root@ceph1
my-cluster]#ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.04408 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
7.2.3列出ceph5的所有的磁盤
[root@ceph1
my-cluster]#ceph-deploy disk list ceph5
7.2.4將ceph5的磁盤加入到ceph集群
[root@ceph1
my-cluster]#ceph-deploy disk zap ceph5:sdbceph5:sdc ceph5:sdd
[root@ceph1
my-cluster]#ceph-deploy osd create ceph5:sdbceph5:sdc ceph5:sdd
[root@ceph1
my-cluster]#ceph-deploy admin ceph2 ceph3 ceph4ceph5
7.2.5加入后查看發(fā)現(xiàn)ceph重新變平衡了
注意點:加入后如果顯示為down的狀態(tài),要注意是否防火墻的原因
[root@ceph1
my-cluster]#ceph -s
cluster 74683cbd-e82e-4264-b8bb-930424bc6a9b
health HEALTH_OK
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 6, quorum 0,1,2 ceph1,ceph3,ceph2
osdmap e254: 12 osds: 12 up, 12 in
flags sortbitwise,require_jewel_osds
pgmap v750: 512 pgs, 1 pools, 0 bytes data, 0 objects
525 MB used, 60781 MB / 61307 MB avail
512 active+clean
[root@ceph1
my-cluster]#ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.05878 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
-5 0.01469host ceph5
90.00490osd.9up1.000001.00000
10 0.00490osd.10up1.000001.00000
11 0.00490osd.11up1.000001.00000
8.3移除ceph osd節(jié)點
8.3.1查看之前的osd節(jié)點數(shù)
[root@ceph1
my-cluster]#ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.05878 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
-5 0.01469host ceph5
90.00490osd.9up1.000001.00000
10 0.00490osd.10up1.000001.00000
11 0.00490osd.11up1.000001.00000
8.3.2將osd節(jié)點移除
[root@ceph1
ceph]#ceph osd out osd.9
marked out osd.9.
[root@ceph1
ceph]#ceph osd out osd.10
marked out osd.10
[root@ceph1
ceph]#ceph osd out osd.11
marked out osd.11
[root@ceph1
ceph]#ceph health
8.3.3停止ceph5的osd服務(wù)律姨,三個osd節(jié)點
[root@ceph1
my-cluster]#ssh ceph5 systemctl stop ceph-osd@9.service
[root@ceph1
my-cluster]#ssh ceph5 systemctl stopceph-osd@10.service
[root@ceph1
my-cluster]#ssh ceph5 systemctl stopceph-osd@11.service
8.3.4查看osd樹狀態(tài)
[root@ceph1
my-cluster]#ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.05878 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
-5 0.01469host ceph5
9 0.00490osd.9down01.00000
100.00490osd.10down01.00000
110.00490osd.11down01.00000
8.3.5將osd移除ceph集群
[root@ceph1
my-cluster]#ceph osd crush remove osd.9
removed item id 9 name 'osd.9' from crushmap
[root@ceph1
my-cluster]#ceph osd crush remove osd.10
removed item id 10 name 'osd.10' from crushmap
[root@ceph1
my-cluster]#ceph osd crush remove osd.11
removed item id 11 name 'osd.11' from crushmap
8.3.6查看集群的狀態(tài)為健康
[root@ceph1
my-cluster]#ceph health
HEALTH_OK
8.3.7刪除osd驗證密鑰
[root@ceph1
my-cluster]#ceph auth del osd.9
updated
[root@ceph1
my-cluster]#ceph auth del osd.10
updated
[root@ceph1
my-cluster]#ceph auth del osd.11
Updated
8.3.8刪除osd振峻,檢查集群的狀態(tài)
[root@ceph1
my-cluster]#ceph osd rm osd.9
removed osd.9
[root@ceph1
my-cluster]#ceph osd rm osd.10
removed osd.10
[root@ceph1
my-cluster]#ceph osd rm osd.11
removed osd.11
[root@ceph1
my-cluster]#ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.04408 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
-50host ceph5#還有 主機信息
#看到ceph5的osd節(jié)點都不存在了,9個osd择份,9UP和9IN
8.3.9將ceph-node4從crush map中刪除扣孟,徹底清空痕跡
[root@ceph1
my-cluster]#ceph osd crush remove ceph5#清除ceph5
removed item id -5 name 'ceph5' from crushmap
[root@ceph1
my-cluster]# ceph osd tree#查看結(jié)果
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.04408 root default
-2 0.01469host ceph1
0 0.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
8.4替換ceph集群的故障磁盤
8.4.1查看集群的狀態(tài)
[root@ceph1
my-cluster]#ceph -s
cluster 74683cbd-e82e-4264-b8bb-930424bc6a9b
health HEALTH_OK
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 6, quorum 0,1,2 ceph1,ceph3,ceph2
osdmap e279: 9 osds: 9 up, 9 in
flags sortbitwise,require_jewel_osds
pgmap v1013: 512 pgs, 1 pools, 0 bytes data, 0 objects
403 MB used, 45577 MB / 45980 MB avail
512 active+clean
8.4.2模擬故障
關(guān)掉ceph3,卸掉一塊硬盤模擬故障
[root@ceph1 my-cluster]# ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.04408 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8down01.00000#現(xiàn)在為故障盤了
8.4.3停止故障OSD
[root@ceph1
my-cluster]#ssh ceph3 systemctl stopceph-osd@8.service
8.4.4刪除故障osd
[root@ceph1
my-cluster]#ceph osd out osd.8 #磁盤標(biāo)記為OUT狀態(tài)
osd.8 is already out.
[root@ceph1
my-cluster]#ceph osd crush rm osd.8#從ceph CRUSH map中移除
removed item id 8 name 'osd.8' from crushmap
[root@ceph1
my-cluster]#ceph auth del osd.8#刪除osd密鑰
updated
[root@ceph1
my-cluster]#ceph osd rm osd.8 #從集群刪除osd
removed osd.8
[root@ceph1
my-cluster]#ceph -s#查看狀態(tài)
cluster 74683cbd-e82e-4264-b8bb-930424bc6a9b
health HEALTH_ERR
14 pgs are stuck inactive for more than 300 seconds
6 pgs degraded
14 pgs peering
6 pgs stuck degraded
14 pgs stuck inactive
105 pgs stuck unclean
6 pgs stuck undersized
6 pgs undersized
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 10, quorum 0,1,2 ceph1,ceph3,ceph2
osdmap e292: 8 osds: 8 up, 8 in
flags sortbitwise,require_jewel_osds
pgmap v1046: 512 pgs, 1 pools, 0 bytes data, 0 objects
280 MB used, 30373 MB / 30653 MB avail
407 active+clean
68 active
17 active+remapped
14 remapped+peering
6 active+undersized+degraded
[root@ceph1 my-cluster]# ceph osd tree#查看狀態(tài)
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.03918 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.00980host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
8.4.4恢復(fù)
關(guān)閉虛擬機荣赶,添加硬盤
[root@ceph3
~]#fdisk -l#可以看到新添加的硬盤
Disk /dev/sdd: 10.7 GB, 10737418240 bytes,20971520 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes /512 bytes
I/O size (minimum/optimal): 512 bytes / 512bytes
查看ceph3的硬盤信息
[root@ceph1
my-cluster]#ceph-deploy disk list ceph3
[ceph_deploy.conf][DEBUG ] foundconfiguration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO] Invoked (1.5.37): /usr/bin/ceph-deploy disklist ceph3
[ceph_deploy.cli][INFO] ceph-deploy options:
[ceph_deploy.cli][INFO]username:None
[ceph_deploy.cli][INFO]verbose: False
[ceph_deploy.cli][INFO]overwrite_conf:False
[ceph_deploy.cli][INFO]subcommand:list
[ceph_deploy.cli][INFO]quiet:False
[ceph_deploy.cli][INFO]cd_conf:
[ceph_deploy.cli][INFO]cluster:ceph
[ceph_deploy.cli][INFO]func:
[ceph_deploy.cli][INFO]ceph_conf:None
[ceph_deploy.cli][INFO]default_release:False
[ceph_deploy.cli][INFO]disk:[('ceph3', None, None)]
[ceph3][DEBUG ] connected to host: ceph3
[ceph3][DEBUG ] detect platform informationfrom remote host
[ceph3][DEBUG ] detect machine type
[ceph3][DEBUG ] find the location of anexecutable
[ceph_deploy.osd][INFO] Distro info: CentOS Linux 7.3.1611 Core
[ceph_deploy.osd][DEBUG ] Listing disks onceph3...
[ceph3][DEBUG ] find the location of anexecutable
[ceph3][INFO] Running command: /usr/sbin/ceph-disk list
[ceph3][DEBUG ] /dev/dm-0 other, xfs,mounted on /
[ceph3][DEBUG ] /dev/dm-1 swap, swap
[ceph3][DEBUG ] /dev/sda :
[ceph3][DEBUG ]/dev/sda2 other, LVM2_member
[ceph3][DEBUG ]/dev/sda1 other, xfs, mounted on /boot
[ceph3][DEBUG ] /dev/sdb :
[ceph3][DEBUG ]/dev/sdb2 ceph journal, for /dev/sdb1
[ceph3][DEBUG ]/dev/sdb1 ceph data, active, cluster ceph,osd.6, journal /dev/sdb2
[ceph3][DEBUG ] /dev/sdc
[ceph3][DEBUG ]/dev/sdc2 ceph journal, for /dev/sdc1
[ceph3][DEBUG ]/dev/sdc1 ceph data, active, cluster ceph,osd.7, journal /dev/sdc2
[ceph3][DEBUG] /dev/sdd other, unknown#剛剛換上去的
[ceph3][DEBUG ] /dev/sr0 other, iso9660
對sdd進(jìn)行disk zap操作
[root@ceph1
my-cluster]#ceph-deploy disk zap ceph3:sdd
為新添加的磁盤創(chuàng)建一個osd
[root@ceph1
my-cluster]#ceph-deploy --overwrite-conf osdcreate ceph3:sdd
[root@ceph1 my-cluster]# ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.04408 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
8 0.00490osd.8up1.000001.00000#加進(jìn)來了
執(zhí)行前:
[root@ceph1 ~]#ceph -s
cluster 74683cbd-e82e-4264-b8bb-930424bc6a9b
health HEALTH_OK
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 14, quorum0,1,2 ceph1,ceph3,ceph2
osdmap e303:8 osds: 8 up, 8 in
flags sortbitwise,require_jewel_osds
pgmap v1078: 512 pgs, 1 pools, 0 bytes data, 0 objects
346 MB used, 40525 MB / 40871 MB avail
512 active+clean
執(zhí)行后:
[root@ceph1
my-cluster]#ceph -s
cluster 74683cbd-e82e-4264-b8bb-930424bc6a9b
health HEALTH_OK
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 14, quorum 0,1,2 ceph1,ceph3,ceph2
osdmap e309: 9 osds: 9 up, 9 in
flags sortbitwise,require_jewel_osds
pgmap v1096: 512 pgs, 1 pools, 0 bytes data, 0 objects
384 MB used, 45596 MB / 45980 MB avail
512 active+clean
硬盤更換成功
問題處理:
1. ceph-deploy install失敗
[ceph_mon][DEBUG ] Retrievinghttp://ceph.com/rpm-firefly/el6/noarch/ceph-release-1-0.el6.noarch.rpm
[ceph_mon][DEBUG ] Preparing...##################################################
[ceph_mon][WARNIN]file /etc/yum.repos.d/ceph.repo from install ofceph-release-1-0.el6.noarch conflicts with file from packageceph-release-1-0.el6.noarch
[ceph_mon][ERROR ] RuntimeError: command returnednon-zero exit status: 1
[ceph_deploy][ERROR ] RuntimeError: Failed to executecommand: rpm -Uvh --replacepkgshttp://ceph.com/rpm-firefly/el6/noarch/ceph-release-1-0.el6.noarch.rpm
解決方法:
yum -y remove ceph-release*
重置凤价,有問題的話,重置
2. deploy fails whenpublic_network contains more than one network
解決方法:
在ceph.conf添加
public_network = 192.168.238.0/24
3.[root@ceph1my-cluster]# ceph health
HEALTH_WARN clock skew
detectedon mon.ceph3,mon.ceph2; Monitor clock skew detected
解決方法:
[root@ceph1 my-cluster]#/usr/sbin/ntpdate ntp1.aliyun.com#同步時間
3.Ceph.conf配置文件修改后拔创,重新部署報錯的解決方式
[root@ceph1 my-cluster]# ceph-deploy adminceph1 ceph2 ceph3
[ceph3][DEBUG] write cluster configuration to /etc/ceph/{cluster}.conf
[ceph_deploy.admin][ERROR ] RuntimeError: config file/etc/ceph/ceph.conf exists with different content; use --overwrite-conf tooverwrite
[ceph_deploy][ERROR ] GenericError: Failed to configure 3 admin hosts
解決方法:
[root@ceph1 my-cluster]#ceph-deploy --overwrite-conf admin ceph1 ceph2ceph3
4.ceph3][ERROR] RuntimeError: command returned non-zero exit status: 1
[ceph_deploy.osd][ERROR ] Failed to executecommand: /usr/sbin/ceph-disk -v prepare --cluster ceph --fs-type xfs --/dev/sdb1
[ceph_deploy][ERROR ] GenericError: Failedto create 3 OSDs
解決方案:每個節(jié)點安裝
[root@ceph1 my-cluster]#yum install xfs* -y
5.
ceph osd down的解決方法
[root@ceph1
my-cluster]#ceph-deploy osd activateceph5:/dev/sdc1
再就是部署的時候一定看好防火墻的狀態(tài)或者放行相應(yīng)的端口
Osd啟動命令
systemctlstart ceph-osd@1.service
停止故障OSD
systemctlstop ceph-osd@1.service
ark"|{=5O??