从hammer到jewel的RGW升级实战-by秦牧羊



前言

本篇来自秦牧羊的一篇分享,讲述的是从hammer升级到jewel的过程,以及其中的一些故障的处理,是一篇非常详细的实战分享

初始状态

pool状态

root@demo:/home/demouser# rados lspools
rbd
.cn.rgw.root
.cn-zone1.rgw.root
.cn-zone1.rgw.domain
.cn-zone1.rgw.control
.cn-zone1.rgw.gc
.cn-zone1.rgw.buckets.index
.cn-zone1.rgw.buckets.extra
.cn-zone1.rgw.buckets
.cn-zone1.log
.cn-zone1.intent-log
.cn-zone1.usage
.cn-zone1.users
.cn-zone1.users.email
.cn-zone1.users.swift
.cn-zone1.users.uid

ceph.conf配置

[client.radosgw.us-zone1]
rgw dns name = s3.ceph.work
rgw frontends = fastcgi
host = ceph.work
rgw region = cn
rgw region root pool = .cn.rgw.root
rgw zone = us-zone1
rgw zone root pool = .cn-zone1.rgw.root
keyring = /etc/ceph/ceph.client.radosgw.keyring
rgw socket path = /home/ceph/var/run/ceph-client.radosgw.us-zone1.sock
log file = /home/ceph/log/radosgw.us-zone1.log
rgw print continue = false
rgw content length compat = true

元数据信息检查

root@demo:/home/demouser# radosgw-admin metadata list user --name client.radosgw.us-zone1
[
"en-user1",
"us-zone1",
"us-user1",
"cn-user1",
"en-zone1",
"cn-zone1",
"cn-user2"

]
root@demo:/home/demouser# radosgw-admin metadata list bucket --name client.radosgw.us-zone1
[
"cn-test1",
"us-test1",
"en-test1",
"cn-test2"

]

软件版本及集群状态

root@demo:/home/demouser# ceph -v
ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43)
root@demo:/home/demouser# ceph -s
cluster 23d6f3f9-0b86-432c-bb18-1722f73e93e0
health HEALTH_OK
monmap e1: 1 mons at {ceph.work=10.63.48.19:6789/0}
election epoch 1, quorum 0 ceph.work
osdmap e43: 3 osds: 3 up, 3 in
pgmap v907719: 544 pgs, 16 pools, 2217 kB data, 242 objects
3119 MB used, 88994 MB / 92114 MB avail
544 active+clean

ceph升级到最新jewel

这里要提醒一点就是如果ceph版本低于0.94.7,直接升级到10.xx会出一些问题,因为低版本的osdmap的数据结构与高版本不兼容,所以先升级到最新的hammer

root@demo:/home/demouser# vi /etc/apt/sources.list.d/ceph.list 
deb http://mirrors.163.com/ceph/debian-hammer/ jessie main #使用163源更新到最新的hammer

root@demo:/home/demouser# apt-get update
...
Fetched 18.7 kB in 11s (1,587 B/s)
Reading package lists... Done

root@demo:/home/demouser# apt-cache policy ceph
ceph:
Installed: 0.94.5-1~bpo80+1 #当前已经安装的版本
Candidate: 0.94.10-1~bpo80+1 #预备安装的版本
Version table:
0.94.10-1~bpo80+1 0
500 http://mirrors.163.com/ceph/debian-hammer/ jessie/main amd64 Packages
*** 0.94.5-1~bpo80+1 0
100 /var/lib/dpkg/status

root@demo:/home/demouser# aptitude install ceph ceph-common ceph-deploy ceph-fs-common ceph-fuse ceph-mds libcephfs1 python-ceph python-cephfs librados2 libradosstriper1 python-rados radosgw radosgw-agent librbd1 python-rbd rbd-fuse radosgw radosgw-agent
The following packages will be REMOVED:
daemon{u} mpt-status{u}
The following packages will be upgraded:
ceph ceph-common ceph-deploy ceph-fs-common ceph-fuse ceph-mds ceph-test libcephfs1 librados2 libradosstriper1 librbd1 python-ceph python-cephfs python-rados python-rbd radosgw radosgw-agent rbd-fuse
The following packages are RECOMMENDED but will NOT be installed:
btrfs-tools fuse
18 packages upgraded, 0 newly installed, 2 to remove and 185 not upgraded.
Need to get 75.3 MB of archives. After unpacking 3,588 kB will be used.
Do you want to continue? [Y/n/?] y
Get: 1 http://mirrors.163.com/ceph/debian-hammer/ jessie/main libcephfs1 amd64 0.94.10-1~bpo80+1 [2,706 kB]
Get: 2 http://mirrors.163.com/ceph/debian-hammer/ jessie/main ceph-mds amd64 0.94.10-1~bpo80+1 [8,053 kB]
Get: 3 http://mirrors.163.com/ceph/debian-hammer/ jessie/main ceph amd64 0.94.10-1~bpo80+1 [11.6 MB]
Get: 4 http://mirrors.163.com/ceph/debian-hammer/ jessie/main ceph-test amd64 0.94.10-1~bpo80+1 [28.2 MB]
Get: 5 http://mirrors.163.com/ceph/debian-hammer/ jessie/main radosgw amd64 0.94.10-1~bpo80+1 [2,576 kB]
Get: 6 http://mirrors.163.com/ceph/debian-hammer/ jessie/main ceph-common amd64 0.94.10-1~bpo80+1 [6,526 kB]
Get: 7 http://mirrors.163.com/ceph/debian-hammer/ jessie/main librbd1 amd64 0.94.10-1~bpo80+1 [2,593 kB]
Get: 8 http://mirrors.163.com/ceph/debian-hammer/ jessie/main libradosstriper1 amd64 0.94.10-1~bpo80+1 [2,554 kB]
Get: 9 http://mirrors.163.com/ceph/debian-hammer/ jessie/main librados2 amd64 0.94.10-1~bpo80+1 [2,479 kB]
Get: 10 http://mirrors.163.com/ceph/debian-hammer/ jessie/main python-rados amd64 0.94.10-1~bpo80+1 [895 kB]
Get: 11 http://mirrors.163.com/ceph/debian-hammer/ jessie/main python-cephfs amd64 0.94.10-1~bpo80+1 [886 kB]
Get: 12 http://mirrors.163.com/ceph/debian-hammer/ jessie/main python-rbd amd64 0.94.10-1~bpo80+1 [891 kB]
Get: 13 http://mirrors.163.com/ceph/debian-hammer/ jessie/main ceph-deploy all 1.5.37 [95.9 kB]
Get: 14 http://mirrors.163.com/ceph/debian-hammer/ jessie/main ceph-fs-common amd64 0.94.10-1~bpo80+1 [903 kB]
Get: 15 http://mirrors.163.com/ceph/debian-hammer/ jessie/main ceph-fuse amd64 0.94.10-1~bpo80+1 [2,515 kB]
Get: 16 http://mirrors.163.com/ceph/debian-hammer/ jessie/main python-ceph amd64 0.94.10-1~bpo80+1 [883 kB]
Get: 17 http://mirrors.163.com/ceph/debian-hammer/ jessie/main radosgw-agent all 1.2.7 [30.1 kB]
Get: 18 http://mirrors.163.com/ceph/debian-hammer/ jessie/main rbd-fuse amd64 0.94.10-1~bpo80+1 [891 kB]
Fetched 75.3 MB in 10s (7,301 kB/s)
Reading changelogs... Done
(Reading database ... 74503 files and directories currently installed.)
Removing mpt-status (1.2.0-8) ...
[ ok ] mpt-statusd is disabled in /etc/default/mpt-statusd, not starting..
Removing daemon (0.6.4-1) ...
Processing triggers for man-db (2.7.0.2-5) ...
(Reading database ... 74485 files and directories currently installed.)
Preparing to unpack .../libcephfs1_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking libcephfs1 (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../ceph-mds_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking ceph-mds (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../ceph_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking ceph (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../ceph-test_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking ceph-test (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../radosgw_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking radosgw (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../ceph-common_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking ceph-common (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../librbd1_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking librbd1 (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../libradosstriper1_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking libradosstriper1 (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../librados2_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking librados2 (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../python-rados_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking python-rados (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../python-cephfs_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking python-cephfs (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../python-rbd_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking python-rbd (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../ceph-deploy_1.5.37_all.deb ...
Unpacking ceph-deploy (1.5.37) over (1.5.28~bpo70+1) ...
Preparing to unpack .../ceph-fs-common_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking ceph-fs-common (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../ceph-fuse_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking ceph-fuse (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../python-ceph_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking python-ceph (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Preparing to unpack .../radosgw-agent_1.2.7_all.deb ...
Unpacking radosgw-agent (1.2.7) over (1.2.4~bpo70+1) ...
Preparing to unpack .../rbd-fuse_0.94.10-1~bpo80+1_amd64.deb ...
Unpacking rbd-fuse (0.94.10-1~bpo80+1) over (0.94.5-1~bpo80+1) ...
Processing triggers for man-db (2.7.0.2-5) ...
Processing triggers for systemd (215-17+deb8u2) ...
Setting up libcephfs1 (0.94.10-1~bpo80+1) ...
Setting up librados2 (0.94.10-1~bpo80+1) ...
Setting up librbd1 (0.94.10-1~bpo80+1) ...
Setting up python-rados (0.94.10-1~bpo80+1) ...
Setting up python-cephfs (0.94.10-1~bpo80+1) ...
Setting up python-rbd (0.94.10-1~bpo80+1) ...
Setting up ceph-common (0.94.10-1~bpo80+1) ...
Installing new version of config file /etc/init.d/rbdmap ...
Setting up ceph (0.94.10-1~bpo80+1) ...
Installing new version of config file /etc/init.d/ceph ...
Installing new version of config file /etc/init/ceph-osd.conf ...

Configuration file '/etc/logrotate.d/ceph'
==> Modified (by you or by a script) since installation.
==> Package distributor has shipped an updated version.
What would you like to do about it ? Your options are:
Y or I : install the package maintainer's version
N or O : keep your currently-installed version
D : show the differences between the versions
Z : start a shell to examine the situation
The default action is to keep your current version.
*** ceph (Y/I/N/O/D/Z) [default=N] ? N
Setting up ceph-mds (0.94.10-1~bpo80+1) ...
Setting up libradosstriper1 (0.94.10-1~bpo80+1) ...
Setting up ceph-test (0.94.10-1~bpo80+1) ...
Setting up radosgw (0.94.10-1~bpo80+1) ...
Installing new version of config file /etc/init.d/radosgw ...
Installing new version of config file /etc/logrotate.d/radosgw ...
Setting up ceph-deploy (1.5.37) ...
Setting up ceph-fs-common (0.94.10-1~bpo80+1) ...
Setting up ceph-fuse (0.94.10-1~bpo80+1) ...
Setting up python-ceph (0.94.10-1~bpo80+1) ...
Setting up radosgw-agent (1.2.7) ...
Installing new version of config file /etc/init.d/radosgw-agent ...
Setting up rbd-fuse (0.94.10-1~bpo80+1) ...
Processing triggers for libc-bin (2.19-18+deb8u1) ...
Processing triggers for systemd (215-17+deb8u2) ...

Current status: 185 updates [-18].

正式升级到最新的hammer

root@demo:/home/demouser# ceph -v 
ceph version 0.94.10 (b1e0532418e4631af01acbc0cedd426f1905f4af) #当前软件包版本已经更新
root@demo:/home/demouser# ceph -s
cluster 23d6f3f9-0b86-432c-bb18-1722f73e93e0
health HEALTH_OK
monmap e1: 1 mons at {ceph.work=10.63.48.19:6789/0}
election epoch 1, quorum 0 ceph.work
osdmap e43: 3 osds: 3 up, 3 in
pgmap v907873: 544 pgs, 16 pools, 2217 kB data, 242 objects
3120 MB used, 88994 MB / 92114 MB avail
544 active+clean

root@demo:/home/demouser# /etc/init.d/ceph status
=== mon.ceph.work ===
mon.ceph.work: running {"version":"0.94.5"} #mon和osd进程还是跑的旧版本
=== osd.0 ===
osd.0: running {"version":"0.94.5"}
=== osd.1 ===
osd.1: running {"version":"0.94.5"}
=== osd.2 ===
osd.2: running {"version":"0.94.5"}

root@demo:/home/demouser# /etc/init.d/ceph restart #手工重启所有服务,线上环境依次先重启mon再是osd,避免批量重启造成影响
=== mon.ceph.work ===
=== mon.ceph.work ===
Stopping Ceph mon.ceph.work on ceph.work...kill 2267...done
=== mon.ceph.work ===
Starting Ceph mon.ceph.work on ceph.work...
=== osd.0 ===
=== osd.0 ===
Stopping Ceph osd.0 on ceph.work...kill 1082...kill 1082...done
=== osd.0 ===
Mounting xfs on ceph.work:/home/ceph/var/lib/osd/ceph-0
create-or-move updated item name 'osd.0' weight 0.03 at location {host=ceph.work,root=default} to crush map
Starting Ceph osd.0 on ceph.work...
starting osd.0 at :/0 osd_data /home/ceph/var/lib/osd/ceph-0 /home/ceph/var/lib/osd/ceph-0/journal
=== osd.1 ===
=== osd.1 ===
Stopping Ceph osd.1 on ceph.work...kill 1262...kill 1262...done
=== osd.1 ===
Mounting xfs on ceph.work:/home/ceph/var/lib/osd/ceph-1
create-or-move updated item name 'osd.1' weight 0.03 at location {host=ceph.work,root=default} to crush map
Starting Ceph osd.1 on ceph.work...
starting osd.1 at :/0 osd_data /home/ceph/var/lib/osd/ceph-1 /home/ceph/var/lib/osd/ceph-1/journal

=== osd.2 ===
=== osd.2 ===
Stopping Ceph osd.2 on ceph.work...kill 1452...kill 1452...done
=== osd.2 ===
Mounting xfs on ceph.work:/home/ceph/var/lib/osd/ceph-2
create-or-move updated item name 'osd.2' weight 0.03 at location {host=ceph.work,root=default} to crush map
Starting Ceph osd.2 on ceph.work...
starting osd.2 at :/0 osd_data /home/ceph/var/lib/osd/ceph-2 /home/ceph/var/lib/osd/ceph-2/journal
root@demo:/home/demouser# /etc/init.d/ceph  status
=== mon.ceph.work ===
mon.ceph.work: running {"version":"0.94.10"} #mon和osd都全部更新到最新
=== osd.0 ===
osd.0: running {"version":"0.94.10"}
=== osd.1 ===
osd.1: running {"version":"0.94.10"}
=== osd.2 ===
osd.2: running {"version":"0.94.10"}
root@demo:/home/demouser# ceph -s
cluster 23d6f3f9-0b86-432c-bb18-1722f73e93e0
health HEALTH_OK
monmap e1: 1 mons at {ceph.work=10.63.48.19:6789/0}
election epoch 1, quorum 0 ceph.work
osdmap e51: 3 osds: 3 up, 3 in
pgmap v907887: 544 pgs, 16 pools, 2217 kB data, 242 objects
3121 MB used, 88992 MB / 92114 MB avail
544 active+clean

升级到最新jewel版本

root@demo:/home/demouser# vi /etc/apt/sources.list.d/ceph.list 
deb http://mirrors.163.com/ceph/debian-jewel/ jessie main #使用163源更新到最新的jewel

root@demo:/home/demouser# apt-get update
...
Fetched 18.7 kB in 11s (1,587 B/s)
Reading package lists... Done

root@demo:/home/demouser# apt-cache policy ceph
ceph:
Installed: 0.94.10-1~bpo80+1 #当前安装的版本
Candidate: 10.2.6-1~bpo80+1 #准备安装的最新jewel版本
Version table:
10.2.6-1~bpo80+1 0
500 http://mirrors.163.com/ceph/debian-jewel/ jessie/main amd64 Packages
*** 0.94.10-1~bpo80+1 0
100 /var/lib/dpkg/status

root@demo:/home/demouser# aptitude install ceph ceph-common ceph-deploy ceph-fs-common ceph-fuse ceph-mds libcephfs1 python-ceph python-cephfs librados2 libradosstriper1 python-rados radosgw radosgw-agent librbd1 python-rbd rbd-fuse radosgw radosgw-agent
The following NEW packages will be installed:
ceph-base{a} ceph-mon{a} ceph-osd{a} libboost-random1.55.0{a} libboost-regex1.55.0{a} librgw2{a} xmlstarlet{a}
The following packages will be upgraded:
ceph ceph-common ceph-fs-common ceph-fuse ceph-mds ceph-test libcephfs1 librados2 libradosstriper1 librbd1 python-ceph python-cephfs python-rados python-rbd radosgw rbd-fuse
The following packages are RECOMMENDED but will NOT be installed:
btrfs-tools fuse
16 packages upgraded, 7 newly installed, 0 to remove and 184 not upgraded.
Need to get 169 MB of archives. After unpacking 464 MB will be used.
Do you want to continue? [Y/n/?] y
....
Fetched 169 MB in 15s (11.0 MB/s)
Reading changelogs... Done
Selecting previously unselected package libboost-random1.55.0:amd64.
(Reading database ... 74299 files and directories currently installed.)
Preparing to unpack .../libboost-random1.55.0_1.55.0+dfsg-3_amd64.deb ...
Unpacking libboost-random1.55.0:amd64 (1.55.0+dfsg-3) ...
Selecting previously unselected package libboost-regex1.55.0:amd64.
Preparing to unpack .../libboost-regex1.55.0_1.55.0+dfsg-3_amd64.deb ...
Unpacking libboost-regex1.55.0:amd64 (1.55.0+dfsg-3) ...
Selecting previously unselected package xmlstarlet.
Preparing to unpack .../xmlstarlet_1.6.1-1_amd64.deb ...
Unpacking xmlstarlet (1.6.1-1) ...
Preparing to unpack .../libcephfs1_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking libcephfs1 (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../ceph-mds_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking ceph-mds (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../ceph_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking ceph (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../ceph-test_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking ceph-test (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../radosgw_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking radosgw (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../ceph-common_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking ceph-common (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../librbd1_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking librbd1 (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../libradosstriper1_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking libradosstriper1 (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../librados2_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking librados2 (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Selecting previously unselected package librgw2.
Preparing to unpack .../librgw2_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking librgw2 (10.2.6-1~bpo80+1) ...
Preparing to unpack .../python-rados_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking python-rados (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../python-cephfs_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking python-cephfs (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../python-rbd_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking python-rbd (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Selecting previously unselected package ceph-base.
Preparing to unpack .../ceph-base_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking ceph-base (10.2.6-1~bpo80+1) ...
Selecting previously unselected package ceph-mon.
Preparing to unpack .../ceph-mon_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking ceph-mon (10.2.6-1~bpo80+1) ...
Selecting previously unselected package ceph-osd.
Preparing to unpack .../ceph-osd_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking ceph-osd (10.2.6-1~bpo80+1) ...
Preparing to unpack .../ceph-fs-common_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking ceph-fs-common (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../ceph-fuse_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking ceph-fuse (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../python-ceph_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking python-ceph (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Preparing to unpack .../rbd-fuse_10.2.6-1~bpo80+1_amd64.deb ...
Unpacking rbd-fuse (10.2.6-1~bpo80+1) over (0.94.10-1~bpo80+1) ...
Processing triggers for man-db (2.7.0.2-5) ...
Processing triggers for systemd (215-17+deb8u2) ...
Setting up libboost-random1.55.0:amd64 (1.55.0+dfsg-3) ...
Setting up libboost-regex1.55.0:amd64 (1.55.0+dfsg-3) ...
Setting up xmlstarlet (1.6.1-1) ...
Setting up libcephfs1 (10.2.6-1~bpo80+1) ...
Setting up librados2 (10.2.6-1~bpo80+1) ...
Setting up librbd1 (10.2.6-1~bpo80+1) ...
Setting up libradosstriper1 (10.2.6-1~bpo80+1) ...
Setting up librgw2 (10.2.6-1~bpo80+1) ...
Setting up python-rados (10.2.6-1~bpo80+1) ...
Setting up python-cephfs (10.2.6-1~bpo80+1) ...
Setting up python-rbd (10.2.6-1~bpo80+1) ...
Setting up ceph-common (10.2.6-1~bpo80+1) ...
Installing new version of config file /etc/bash_completion.d/rbd ...
Installing new version of config file /etc/init.d/rbdmap ...
Setting system user ceph properties..usermod: user ceph is currently used by process 5312
dpkg: error processing package ceph-common (--configure): #需要重启进程才能更新配置,忽略这里及以下错误
subprocess installed post-installation script returned error exit status 8
dpkg: dependency problems prevent configuration of ceph-base:
ceph-base depends on ceph-common (= 10.2.6-1~bpo80+1); however:
Package ceph-common is not configured yet.

dpkg: error processing package ceph-base (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph-mds:
ceph-mds depends on ceph-base (= 10.2.6-1~bpo80+1); however:
Package ceph-base is not configured yet.

dpkg: error processing package ceph-mds (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph-mon:
ceph-mon depends on ceph-base (= 10.2.6-1~bpo80+1); however:
Package ceph-base is not configured yet.

dpkg: error processing package ceph-mon (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph-osd:
ceph-osd depends on ceph-base (= 10.2.6-1~bpo80+1); however:
Package ceph-base is not configured yet.

dpkg: error processing package ceph-osd (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph:
ceph depends on ceph-mon (= 10.2.6-1~bpo80+1); however:
Package ceph-mon is not configured yet.
ceph depends on ceph-osd (= 10.2.6-1~bpo80+1); however:
Package ceph-osd is not configured yet.

dpkg: error processing package ceph (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph-test:
ceph-test depends on ceph-common; however:
Package ceph-common is not configured yet.

dpkg: error processing package ceph-test (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of radosgw:
radosgw depends on ceph-common (= 10.2.6-1~bpo80+1); however:
Package ceph-common is not configured yet.

dpkg: error processing package radosgw (--configure):
dependency problems - leaving unconfigured
Setting up ceph-fs-common (10.2.6-1~bpo80+1) ...
Setting up ceph-fuse (10.2.6-1~bpo80+1) ...
Setting up python-ceph (10.2.6-1~bpo80+1) ...
Setting up rbd-fuse (10.2.6-1~bpo80+1) ...
Processing triggers for libc-bin (2.19-18+deb8u1) ...
Processing triggers for systemd (215-17+deb8u2) ...
Errors were encountered while processing:
ceph-common
ceph-base
ceph-mds
ceph-mon
ceph-osd
ceph
ceph-test
radosgw
E: Sub-process /usr/bin/dpkg returned an error code (1)
Failed to perform requested operation on package. Trying to recover:
Setting up ceph-common (10.2.6-1~bpo80+1) ...
Setting system user ceph properties..usermod: user ceph is currently used by process 5312
dpkg: error processing package ceph-common (--configure):
subprocess installed post-installation script returned error exit status 8
dpkg: dependency problems prevent configuration of radosgw:
radosgw depends on ceph-common (= 10.2.6-1~bpo80+1); however:
Package ceph-common is not configured yet.

dpkg: error processing package radosgw (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph-test:
ceph-test depends on ceph-common; however:
Package ceph-common is not configured yet.

dpkg: error processing package ceph-test (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph-base:
ceph-base depends on ceph-common (= 10.2.6-1~bpo80+1); however:
Package ceph-common is not configured yet.

dpkg: error processing package ceph-base (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph-osd:
ceph-osd depends on ceph-base (= 10.2.6-1~bpo80+1); however:
Package ceph-base is not configured yet.

dpkg: error processing package ceph-osd (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph-mds:
ceph-mds depends on ceph-base (= 10.2.6-1~bpo80+1); however:
Package ceph-base is not configured yet.

dpkg: error processing package ceph-mds (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph:
ceph depends on ceph-osd (= 10.2.6-1~bpo80+1); however:
Package ceph-osd is not configured yet.

dpkg: error processing package ceph (--configure):
dependency problems - leaving unconfigured
dpkg: dependency problems prevent configuration of ceph-mon:
ceph-mon depends on ceph-base (= 10.2.6-1~bpo80+1); however:
Package ceph-base is not configured yet.

dpkg: error processing package ceph-mon (--configure):
dependency problems - leaving unconfigured
Errors were encountered while processing:
ceph-common
radosgw
ceph-test
ceph-base
ceph-osd
ceph-mds
ceph
ceph-mon

Current status: 184 updates [-16].




root@demo:/home/demouser# /etc/init.d/ceph status
=== mon.ceph.work ===
mon.ceph.work: running {"version":"0.94.10"} #当前mon和osd版本还是旧版本
=== osd.0 ===
osd.0: running {"version":"0.94.10"}
=== osd.1 ===
osd.1: running {"version":"0.94.10"}
=== osd.2 ===
osd.2: running {"version":"0.94.10"}
root@demo:/home/demouser# ceph -s
cluster 23d6f3f9-0b86-432c-bb18-1722f73e93e0
health HEALTH_OK
monmap e1: 1 mons at {ceph.work=10.63.48.19:6789/0}
election epoch 1, quorum 0 ceph.work
osdmap e51: 3 osds: 3 up, 3 in
pgmap v907893: 544 pgs, 16 pools, 2217 kB data, 242 objects
3120 MB used, 88993 MB / 92114 MB avail
544 active+clean
root@demo:/home/demouser# /etc/init.d/ceph restart #手工重启所有服务,线上环境依次先重启mon再是osd,避免批量重启造成影响
=== mon.ceph.work ===
=== mon.ceph.work ===
Stopping Ceph mon.ceph.work on ceph.work...kill 5312...done
=== mon.ceph.work ===
Starting Ceph mon.ceph.work on ceph.work...
=== osd.0 ===
=== osd.0 ===
Stopping Ceph osd.0 on ceph.work...kill 5677...kill 5677...done
=== osd.0 ===
Mounting xfs on ceph.work:/home/ceph/var/lib/osd/ceph-0
create-or-move updated item name 'osd.0' weight 0.03 at location {host=ceph.work,root=default} to crush map
Starting Ceph osd.0 on ceph.work...
starting osd.0 at :/0 osd_data /home/ceph/var/lib/osd/ceph-0 /home/ceph/var/lib/osd/ceph-0/journal
=== osd.1 ===
=== osd.1 ===
Stopping Ceph osd.1 on ceph.work...kill 6087...kill 6087...done
=== osd.1 ===
Mounting xfs on ceph.work:/home/ceph/var/lib/osd/ceph-1
create-or-move updated item name 'osd.1' weight 0.03 at location {host=ceph.work,root=default} to crush map
Starting Ceph osd.1 on ceph.work...
starting osd.1 at :/0 osd_data /home/ceph/var/lib/osd/ceph-1 /home/ceph/var/lib/osd/ceph-1/journal
=== osd.2 ===
=== osd.2 ===
Stopping Ceph osd.2 on ceph.work...kill 6503...kill 6503...done
=== osd.2 ===
Mounting xfs on ceph.work:/home/ceph/var/lib/osd/ceph-2
create-or-move updated item name 'osd.2' weight 0.03 at location {host=ceph.work,root=default} to crush map
Starting Ceph osd.2 on ceph.work...
starting osd.2 at :/0 osd_data /home/ceph/var/lib/osd/ceph-2 /home/ceph/var/lib/osd/ceph-2/journal

root@demo:/home/demouser# ceph -s #出现crushmap 兼容性告警
cluster 23d6f3f9-0b86-432c-bb18-1722f73e93e0
health HEALTH_WARN
crush map has legacy tunables (require bobtail, min is firefly)
all OSDs are running jewel or later but the 'require_jewel_osds' osdmap flag is not set
monmap e1: 1 mons at {ceph.work=10.63.48.19:6789/0}
election epoch 2, quorum 0 ceph.work
osdmap e61: 3 osds: 3 up, 3 in
pgmap v907906: 544 pgs, 16 pools, 2217 kB data, 242 objects
3122 MB used, 88991 MB / 92114 MB avail
544 active+clean



root@demo:/home/demouser# /etc/init.d/ceph status #检查所有服务进程版本是否到最新
=== mon.ceph.work ===
mon.ceph.work: running {"version":"10.2.6"}
=== osd.0 ===
osd.0: running {"version":"10.2.6"}
=== osd.1 ===
osd.1: running {"version":"10.2.6"}
=== osd.2 ===
osd.2: running {"version":"10.2.6"}


root@demo:/home/demouser# ceph osd set require_jewel_osds
set require_jewel_osds
root@demo:/home/demouser# ceph osd crush tunables optimal
adjusted tunables profile to optimal
root@demo:/home/demouser# ceph -s #调整crushmap兼容性参数以后恢复正常
cluster 23d6f3f9-0b86-432c-bb18-1722f73e93e0
health HEALTH_OK
monmap e1: 1 mons at {ceph.work=10.63.48.19:6789/0}
election epoch 2, quorum 0 ceph.work
osdmap e63: 3 osds: 3 up, 3 in
flags require_jewel_osds
pgmap v907917: 544 pgs, 16 pools, 2217 kB data, 242 objects
3122 MB used, 88991 MB / 92114 MB avail
544 active+clean

rgw服务的修复

rgw启动报错

root@demo:/home/demouser# /etc/init.d/radosgw start #重启失败,发现以下错误log

2017-03-23 15:03:48.309461 7f7f175998c0 0 ceph version 10.2.6 (656b5b63ed7c43bd014bcafd81b001959d5f089f), process radosgw, pid 11488
2017-03-23 15:03:48.317937 7f7f175998c0 20 get_system_obj_state: rctx=0x7ffcdb751e30 obj=.rgw.root:default.realm state=0x7f7f17e93368 s->prefetch_data=0
2017-03-23 15:03:48.317943 7f7ef17fa700 2 RGWDataChangesLog::ChangesRenewThread: start
2017-03-23 15:03:48.318759 7f7f175998c0 20 get_system_obj_state: rctx=0x7ffcdb7518f0 obj=.rgw.root:converted state=0x7f7f17e93368 s->prefetch_data=0
2017-03-23 15:03:48.319140 7f7f175998c0 20 get_system_obj_state: rctx=0x7ffcdb751060 obj=.rgw.root:default.realm state=0x7f7f17e94398 s->prefetch_data=0
2017-03-23 15:03:48.319513 7f7f175998c0 10 could not read realm id: (2) No such file or directory
2017-03-23 15:03:48.319858 7f7f175998c0 10 failed to list objects pool_iterate_begin() returned r=-2
2017-03-23 15:03:48.319890 7f7f175998c0 20 get_system_obj_state: rctx=0x7ffcdb751290 obj=.cn-zone1.rgw.root:zone_names.default state=0x7f7f17e94e38 s->prefetch_data=0
2017-03-23 15:03:48.321308 7f7f175998c0 0 error in read_id for object name: default : (2) No such file or directory
2017-03-23 15:03:48.321335 7f7f175998c0 20 get_system_obj_state: rctx=0x7ffcdb751290 obj=.rgw.root:zonegroups_names.default state=0x7f7f17e94e38 s->prefetch_data=0
2017-03-23 15:03:48.321725 7f7f175998c0 0 error in read_id for object name: default : (2) No such file or directory
2017-03-23 15:03:48.321756 7f7f175998c0 20 get_system_obj_state: rctx=0x7ffcdb751f60 obj=.cn.rgw.root:region_map state=0x7f7f17e93368 s->prefetch_data=0
2017-03-23 15:03:48.322998 7f7f175998c0 10 cannot find current period zonegroup using local zonegroup
2017-03-23 15:03:48.323018 7f7f175998c0 20 get_system_obj_state: rctx=0x7ffcdb751d10 obj=.rgw.root:zonegroups_names.cn state=0x7f7f17e93368 s->prefetch_data=0
2017-03-23 15:03:48.323356 7f7f175998c0 0 error in read_id for object name: cn : (2) No such file or directory
2017-03-23 15:03:48.323371 7f7f175998c0 0 failed reading zonegroup info: ret -2 (2) No such file or directory
2017-03-23 15:03:48.324456 7f7f175998c0 -1 Couldn't init storage provider (RADOS)

检查最新的pool列表

root@demo:/home/demouser# rados lspools
rbd
.cn.rgw.root
.cn-zone1.rgw.root
.cn-zone1.rgw.domain
.cn-zone1.rgw.control
.cn-zone1.rgw.gc
.cn-zone1.rgw.buckets.index
.cn-zone1.rgw.buckets.extra
.cn-zone1.rgw.buckets
.cn-zone1.log
.cn-zone1.intent-log
.cn-zone1.usage
.cn-zone1.users
.cn-zone1.users.email
.cn-zone1.users.swift
.cn-zone1.users.uid
.rgw.root
default.rgw.control #新J版本默认新增的几个pool
default.rgw.data.root
default.rgw.gc
default.rgw.log

root@demo:/home/demouser# ceph df
GLOBAL:
SIZE AVAIL RAW USED %RAW USED
92114M 88987M 3126M 3.39
POOLS:
NAME ID USED %USED MAX AVAIL OBJECTS
rbd 0 0 0 88554M 0
.cn.rgw.root 1 338 0 88554M 2
.cn-zone1.rgw.root 2 1419 0 88554M 2
.cn-zone1.rgw.domain 3 1829 0 88554M 9
.cn-zone1.rgw.control 4 0 0 88554M 8
.cn-zone1.rgw.gc 5 0 0 88554M 32
.cn-zone1.rgw.buckets.index 6 0 0 88554M 88
.cn-zone1.rgw.buckets.extra 7 0 0 88554M 0
.cn-zone1.rgw.buckets 8 2212k 0 88554M 5
.cn-zone1.log 9 0 0 88554M 80
.cn-zone1.intent-log 10 0 0 88554M 0
.cn-zone1.usage 11 0 0 88554M 0
.cn-zone1.users 12 84 0 88554M 7
.cn-zone1.users.email 13 0 0 88554M 0
.cn-zone1.users.swift 14 0 0 88554M 0
.cn-zone1.users.uid 15 2054 0 88554M 9
.rgw.root 16 1588 0 88554M 4
default.rgw.control 17 0 0 88554M 8
default.rgw.data.root 18 0 0 88554M 0
default.rgw.gc 19 0 0 88554M 0
default.rgw.log 20 0 0 88554M 0

调整默认的zone配置

root@demo:/home/demouser# rados ls -p .rgw.root #新版本的realm、zone、zonegroup的信息都会默认保存在这里,后续需要将原来region和zone的配置从.cn.rgw.root切换到这里,实现新旧版本的集群数据更新
zone_info.2f58efaa-3fa2-48b2-b996-7f924ae1215c
zonegroup_info.9d07fb3c-45d7-4d63-a475-fd6ebd41b722
zonegroups_names.default
zone_names.default

root@demo:/home/demouser# radosgw-admin realm list #默认realm为空
{
"default_info": "",
"realms": []
}

root@demo:/home/demouser# radosgw-admin zonegroups list #默认会新建一个名称为default的zonegroup
read_default_id : -2
{
"default_info": "",
"zonegroups": [
"default"
]
}

root@demo:/home/demouser# radosgw-admin zone list #默认会新建一个名称为default的zone
{
"default_info": "",
"zones": [
"default"
]
}

root@demo:/home/demouser# radosgw-admin zonegroup get --rgw-zonegroup=default #查看默认的zonegroup配置
{
"id": "9d07fb3c-45d7-4d63-a475-fd6ebd41b722",
"name": "default",
"api_name": "",
"is_master": "true",
"endpoints": [],
"hostnames": [],
"hostnames_s3website": [],
"master_zone": "2f58efaa-3fa2-48b2-b996-7f924ae1215c",
"zones": [
{
"id": "2f58efaa-3fa2-48b2-b996-7f924ae1215c",
"name": "default",
"endpoints": [],
"log_meta": "false",
"log_data": "false",
"bucket_index_max_shards": 0,
"read_only": "false"
}
],
"placement_targets": [
{
"name": "default-placement",
"tags": []
}
],
"default_placement": "default-placement",
"realm_id": ""
}

root@demo:/home/demouser# radosgw-admin zone get --rgw-zone=default #查看默认的zone配置

{
"id": "2f58efaa-3fa2-48b2-b996-7f924ae1215c",
"name": "default",
"domain_root": "default.rgw.data.root",
"control_pool": "default.rgw.control",
"gc_pool": "default.rgw.gc",
"log_pool": "default.rgw.log",
"intent_log_pool": "default.rgw.intent-log",
"usage_log_pool": "default.rgw.usage",
"user_keys_pool": "default.rgw.users.keys",
"user_email_pool": "default.rgw.users.email",
"user_swift_pool": "default.rgw.users.swift",
"user_uid_pool": "default.rgw.users.uid",
"system_key": {
"access_key": "",
"secret_key": ""
},
"placement_pools": [
{
"key": "default-placement",
"val": {
"index_pool": "default.rgw.buckets.index",
"data_pool": "default.rgw.buckets.data",
"data_extra_pool": "default.rgw.buckets.non-ec",
"index_type": 0
}
}
],
"metadata_heap": "",
"realm_id": ""
}
root@demo:/home/demouser# radosgw-admin zone get --rgw-zone=default > zone.info#导出默认的zone配置
root@demo:/home/demouser# radosgw-admin zone set --rgw-zone=default < zone.info #修改默认配置如下
zone id 2f58efaa-3fa2-48b2-b996-7f924ae1215c{
"id": "2f58efaa-3fa2-48b2-b996-7f924ae1215c",
"name": "default",
"domain_root": ".cn-zone1.rgw.domain",
"control_pool": ".cn-zone1.rgw.control",
"gc_pool": ".cn-zone1.rgw.gc",
"log_pool": ".cn-zone1.log",
"intent_log_pool": ".cn-zone1.intent-log",
"usage_log_pool": ".cn-zone1.usage",
"user_keys_pool": ".cn-zone1.users", #这个是之前的users pool,新版本改名了
"user_email_pool": ".cn-zone1.users.email",
"user_swift_pool": ".cn-zone1.users.swift",
"user_uid_pool": ".cn-zone1.users.uid",
"system_key": {
"access_key": "",
"secret_key": ""
},
"placement_pools": [
{
"key": "default-placement",
"val": {
"index_pool": ".cn-zone1.rgw.buckets.index",
"data_pool": ".cn-zone1.rgw.buckets",
"data_extra_pool": ".cn-zone1.rgw.buckets.extra",
"index_type": 0
}
}
],
"metadata_heap": "",
"realm_id": ""
}

调整ceph.conf配置

root@demo:/home/demouser# /etc/init.d/radosgw start #重启失败,发现以下错误log
2017-03-23 15:21:42.300998 7fc1d58718c0 0 ceph version 10.2.6 (656b5b63ed7c43bd014bcafd81b001959d5f089f), process radosgw, pid 12586
2017-03-23 15:21:42.318848 7fc1d58718c0 0 error in read_id for object name: default : (2) No such file or directory
2017-03-23 15:21:42.322114 7fc1d58718c0 0 error in read_id for object name: cn : (2) No such file or directory
2017-03-23 15:21:42.322129 7fc1d58718c0 0 failed reading zonegroup info: ret -2 (2) No such file or directory
2017-03-23 15:21:42.323295 7fc1d58718c0 -1 Couldn't init storage provider (RADOS)
修改ceph.conf配置如下
[client.radosgw.us-zone1]
rgw dns name = s3.i.nease.net
rgw frontends = fastcgi
host = ceph.work
keyring = /etc/ceph/ceph.client.radosgw.keyring
rgw socket path = /home/ceph/var/run/ceph-client.radosgw.us-zone1.sock
log file = /home/ceph/log/radosgw.us-zone1.log
rgw print continue = false
rgw content length compat = true


root@demo:/home/demouser# /etc/init.d/radosgw start #成功启动

测试效果

root@demo:/home/demouser# radosgw-admin metadata list user #user元数据正常
[
"en-user1",
"us-zone1",
"us-user1",
"cn-user1",
"en-zone1",
"cn-zone1",
"cn-user2"
]
root@demo:/home/demouser# radosgw-admin metadata list bucket #bucket元数据正常
[
"cn-test1",
"us-test1",
"en-test1",
"cn-test2"
]

root@demo:/home/demouser# radosgw-admin user info --uid=en-user1 #获取用户信息
{
"user_id": "en-user1",
"display_name": "en-user1",
"email": "",
"suspended": 0,
"max_buckets": 1000,
"auid": 0,
"subusers": [],
"keys": [
{
"user": "en-user1",
"access_key": "PWDYNWWXXC3GCYLIJUWL",
"secret_key": "R5kiJPTEroPkUW9TNNM4WWYgXHSMsHoWPxqkRnsG"
}
],
"swift_keys": [],
"caps": [],
"op_mask": "read, write, delete",
"default_placement": "",
"placement_tags": [],
"bucket_quota": {
"enabled": false,
"max_size_kb": -1,
"max_objects": -1
},
"user_quota": {
"enabled": false,
"max_size_kb": -1,
"max_objects": -1
},
"temp_url_keys": []
}

root@demo:/home/demouser# radosgw-admin user create --uid=demotest --display-name=demotest #新建用户
{
"user_id": "demotest",
"display_name": "demotest",
"email": "",
"suspended": 0,
"max_buckets": 1000,
"auid": 0,
"subusers": [],
"keys": [
{
"user": "demotest",
"access_key": "1S9Q6K0P90180M1VFPNR",
"secret_key": "R123LHsqVzMRe3jvJokPPDSYzmAtIxM5jxywQMTP"
}
],
"swift_keys": [],
"caps": [],
"op_mask": "read, write, delete",
"default_placement": "",
"placement_tags": [],
"bucket_quota": {
"enabled": false,
"max_size_kb": -1,
"max_objects": -1
},
"user_quota": {
"enabled": false,
"max_size_kb": -1,
"max_objects": -1
},
"temp_url_keys": []
}

总结

旧版本hammer的rgw管理模型是 region->zone两级结构,而新版本变成了realm->zonegroup->zone,同时部分pool的命名规则也发生了变更,如果总结升级ceph版本,会出现RGW服务启动失败,导致RGW启动失败的因素有两类,一类是pool名称的变更,另外一类是ceph.conf中rgw的配置变更。本文通过真实用例,实现了新旧版本的切换,各位实际环境还是要谨慎操作,毕竟跨版本的升级还是有很大风险。 —-by 秦牧羊

官方升级操作指南:http://docs.ceph.com/docs/master/radosgw/upgrade_to_jewel/

变更记录

Why Who When
创建 dev-广州-秦牧羊 2017-03-24