aboutsummaryrefslogtreecommitdiffstats
path: root/extraconfig
diff options
context:
space:
mode:
authorJenkins <jenkins@review.openstack.org>2016-09-09 06:39:11 +0000
committerGerrit Code Review <review@openstack.org>2016-09-09 06:39:11 +0000
commit0f3249df2b68ff426a6aff6ccd8981747e234586 (patch)
tree52ef1fc0130241ffce5b1dde9f5acc24d7cd9c71 /extraconfig
parentbedf3dc546982624628267f812bae24d9b73ff44 (diff)
parent4ea34edb793ab4bd3d9d5198ea9ac7568930c384 (diff)
Merge "Add Ceph cluster health validation on upgrade"
Diffstat (limited to 'extraconfig')
-rwxr-xr-xextraconfig/tasks/major_upgrade_ceph_mon.sh21
-rw-r--r--extraconfig/tasks/major_upgrade_pacemaker.yaml15
2 files changed, 32 insertions, 4 deletions
diff --git a/extraconfig/tasks/major_upgrade_ceph_mon.sh b/extraconfig/tasks/major_upgrade_ceph_mon.sh
index 38befbbf..b76dd7c3 100755
--- a/extraconfig/tasks/major_upgrade_ceph_mon.sh
+++ b/extraconfig/tasks/major_upgrade_ceph_mon.sh
@@ -17,6 +17,21 @@ if ! [[ "$INSTALLED_VERSION" =~ ^0\.94.* ]]; then
exit 0
fi
+CEPH_STATUS=$(ceph health | awk '{print $1}')
+if [ ${CEPH_STATUS} = HEALTH_ERR ]; do
+ echo ERROR: Ceph cluster status is HEALTH_ERR, cannot be upgraded
+ exit 1
+fi
+
+# Useful when upgrading with OSDs num < replica size
+if [ $ignore_ceph_upgrade_warnings != "true" ]; then
+ timeout 300 bash -c "while [ ${CEPH_STATUS} != HEALTH_OK ]; do
+ echo WARNING: Waiting for Ceph cluster status to go HEALTH_OK;
+ sleep 30;
+ CEPH_STATUS=$(ceph health | awk '{print $1}')
+ done"
+fi
+
MON_PID=$(pidof ceph-mon)
MON_ID=$(hostname -s)
@@ -37,8 +52,6 @@ if [[ "$UPDATED_VERSION" =~ ^0\.94.* ]]; then
echo WARNING: Ceph was not upgraded, restarting daemons
service ceph start mon.${MON_ID}
elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then
- echo INFO: Ceph was upgraded to Jewel
-
# RPM could own some of these but we can't take risks on the pre-existing files
for d in /var/lib/ceph/mon /var/log/ceph /var/run/ceph /etc/ceph; do
chown -R ceph:ceph $d
@@ -54,9 +67,11 @@ elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then
# Wait for daemon to be back in the quorum
timeout 300 bash -c "until (ceph quorum_status | jq .quorum_names | grep -sq ${MON_ID}); do
- echo Waiting for mon.${MON_ID} to re-join quorum;
+ echo WARNING: Waiting for mon.${MON_ID} to re-join quorum;
sleep 10;
done"
+
+ echo INFO: Ceph was upgraded to Jewel
else
echo ERROR: Ceph was upgraded to an unknown release, daemon is stopped, need manual intervention
exit 1
diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml
index c2e14880..598d22d0 100644
--- a/extraconfig/tasks/major_upgrade_pacemaker.yaml
+++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml
@@ -26,6 +26,10 @@ parameters:
constraints:
- allowed_values: ['auto', 'yes', 'no']
default: 'auto'
+ IgnoreCephUpgradeWarnings:
+ type: boolean
+ default: false
+ description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean
resources:
# TODO(jistr): for Mitaka->Newton upgrades and further we can use
@@ -36,7 +40,16 @@ resources:
type: OS::Heat::SoftwareConfig
properties:
group: script
- config: {get_file: major_upgrade_ceph_mon.sh}
+ config:
+ list_join:
+ - ''
+ - - str_replace:
+ template: |
+ #!/bin/bash
+ ignore_ceph_upgrade_warnings='IGNORE_CEPH_UPGRADE_WARNINGS'
+ params:
+ IGNORE_CEPH_UPGRADE_WARNINGS: {get_param: IgnoreCephUpgradeWarnings}
+ - get_file: major_upgrade_ceph_mon.sh
CephMonUpgradeDeployment:
type: OS::Heat::SoftwareDeploymentGroup