diff options
author | Jenkins <jenkins@review.openstack.org> | 2016-09-09 06:39:11 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2016-09-09 06:39:11 +0000 |
commit | 0f3249df2b68ff426a6aff6ccd8981747e234586 (patch) | |
tree | 52ef1fc0130241ffce5b1dde9f5acc24d7cd9c71 /extraconfig | |
parent | bedf3dc546982624628267f812bae24d9b73ff44 (diff) | |
parent | 4ea34edb793ab4bd3d9d5198ea9ac7568930c384 (diff) |
Merge "Add Ceph cluster health validation on upgrade"
Diffstat (limited to 'extraconfig')
-rwxr-xr-x | extraconfig/tasks/major_upgrade_ceph_mon.sh | 21 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_pacemaker.yaml | 15 |
2 files changed, 32 insertions, 4 deletions
diff --git a/extraconfig/tasks/major_upgrade_ceph_mon.sh b/extraconfig/tasks/major_upgrade_ceph_mon.sh index 38befbbf..b76dd7c3 100755 --- a/extraconfig/tasks/major_upgrade_ceph_mon.sh +++ b/extraconfig/tasks/major_upgrade_ceph_mon.sh @@ -17,6 +17,21 @@ if ! [[ "$INSTALLED_VERSION" =~ ^0\.94.* ]]; then exit 0 fi +CEPH_STATUS=$(ceph health | awk '{print $1}') +if [ ${CEPH_STATUS} = HEALTH_ERR ]; do + echo ERROR: Ceph cluster status is HEALTH_ERR, cannot be upgraded + exit 1 +fi + +# Useful when upgrading with OSDs num < replica size +if [ $ignore_ceph_upgrade_warnings != "true" ]; then + timeout 300 bash -c "while [ ${CEPH_STATUS} != HEALTH_OK ]; do + echo WARNING: Waiting for Ceph cluster status to go HEALTH_OK; + sleep 30; + CEPH_STATUS=$(ceph health | awk '{print $1}') + done" +fi + MON_PID=$(pidof ceph-mon) MON_ID=$(hostname -s) @@ -37,8 +52,6 @@ if [[ "$UPDATED_VERSION" =~ ^0\.94.* ]]; then echo WARNING: Ceph was not upgraded, restarting daemons service ceph start mon.${MON_ID} elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then - echo INFO: Ceph was upgraded to Jewel - # RPM could own some of these but we can't take risks on the pre-existing files for d in /var/lib/ceph/mon /var/log/ceph /var/run/ceph /etc/ceph; do chown -R ceph:ceph $d @@ -54,9 +67,11 @@ elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then # Wait for daemon to be back in the quorum timeout 300 bash -c "until (ceph quorum_status | jq .quorum_names | grep -sq ${MON_ID}); do - echo Waiting for mon.${MON_ID} to re-join quorum; + echo WARNING: Waiting for mon.${MON_ID} to re-join quorum; sleep 10; done" + + echo INFO: Ceph was upgraded to Jewel else echo ERROR: Ceph was upgraded to an unknown release, daemon is stopped, need manual intervention exit 1 diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml index c2e14880..598d22d0 100644 --- a/extraconfig/tasks/major_upgrade_pacemaker.yaml +++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml @@ -26,6 +26,10 @@ parameters: constraints: - allowed_values: ['auto', 'yes', 'no'] default: 'auto' + IgnoreCephUpgradeWarnings: + type: boolean + default: false + description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean resources: # TODO(jistr): for Mitaka->Newton upgrades and further we can use @@ -36,7 +40,16 @@ resources: type: OS::Heat::SoftwareConfig properties: group: script - config: {get_file: major_upgrade_ceph_mon.sh} + config: + list_join: + - '' + - - str_replace: + template: | + #!/bin/bash + ignore_ceph_upgrade_warnings='IGNORE_CEPH_UPGRADE_WARNINGS' + params: + IGNORE_CEPH_UPGRADE_WARNINGS: {get_param: IgnoreCephUpgradeWarnings} + - get_file: major_upgrade_ceph_mon.sh CephMonUpgradeDeployment: type: OS::Heat::SoftwareDeploymentGroup |