diff options
Diffstat (limited to 'extraconfig/tasks')
-rw-r--r-- | extraconfig/tasks/major_upgrade_ceph_storage.sh | 77 | ||||
-rwxr-xr-x | extraconfig/tasks/pacemaker_resource_restart.sh | 24 |
2 files changed, 83 insertions, 18 deletions
diff --git a/extraconfig/tasks/major_upgrade_ceph_storage.sh b/extraconfig/tasks/major_upgrade_ceph_storage.sh index de42b16d..03a1c1c2 100644 --- a/extraconfig/tasks/major_upgrade_ceph_storage.sh +++ b/extraconfig/tasks/major_upgrade_ceph_storage.sh @@ -4,32 +4,89 @@ # major upgrade workflow. # set -eu +set -o pipefail UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh -cat > $UPGRADE_SCRIPT << ENDOFCAT +cat > $UPGRADE_SCRIPT << 'ENDOFCAT' +#!/bin/bash ### DO NOT MODIFY THIS FILE ### This file is automatically delivered to the ceph-storage nodes as part of the ### tripleo upgrades workflow +set -eu + +echo INFO: starting $(basename "$0") +# Exit if not running +if ! pidof ceph-osd; then + echo INFO: ceph-osd is not running, skipping + exit 0 +fi -function systemctl_ceph { - action=\$1 - systemctl \$action ceph -} +# Exit if not Hammer +INSTALLED_VERSION=$(ceph --version | awk '{print $3}') +if ! [[ "$INSTALLED_VERSION" =~ ^0\.94.* ]]; then + echo INFO: version of Ceph installed is not 0.94, skipping + exit 0 +fi -# "so that mirrors aren't rebalanced as if the OSD died" - gfidente +OSD_PIDS=$(pidof ceph-osd) +OSD_IDS=$(ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }') + +# "so that mirrors aren't rebalanced as if the OSD died" - gfidente / leseb ceph osd set noout +ceph osd set norebalance +ceph osd set nodeep-scrub +ceph osd set noscrub + +# Stop daemon using Hammer sysvinit script +for OSD_ID in $OSD_IDS; do + service ceph stop osd.${OSD_ID} +done + +# Nice guy will return non-0 only when all failed +timeout 60 bash -c "while kill -0 ${OSD_PIDS} 2> /dev/null; do + sleep 2; +done" -systemctl_ceph stop +# Update (Ceph to Jewel) yum -y install python-zaqarclient # needed for os-collect-config yum -y update -systemctl_ceph start -ceph osd unset noout +# Restart/Exit if not on Jewel, only in that case we need the changes +UPDATED_VERSION=$(ceph --version | awk '{print $3}') +if [[ "$UPDATED_VERSION" =~ ^0\.94.* ]]; then + echo WARNING: Ceph was not upgraded, restarting daemon + for OSD_ID in $OSD_IDS; do + service ceph start osd.${OSD_ID} + done +elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then + # RPM could own some of these but we can't take risks on the pre-existing files + for d in /var/lib/ceph/osd /var/log/ceph /var/run/ceph /etc/ceph; do + chown -R ceph:ceph $d + done + + # Replay udev events with newer rules + udevadm trigger && udevadm settle + + # Enable systemd unit + systemctl enable ceph-osd.target + for OSD_ID in $OSD_IDS; do + systemctl enable ceph-osd@${OSD_ID} + systemctl start ceph-osd@${OSD_ID} + done + echo INFO: Ceph was upgraded to Jewel +else + echo ERROR: Ceph was upgraded to an unknown release, daemon is stopped, need manual intervention + exit 1 +fi + +ceph osd unset noout +ceph osd unset norebalance +ceph osd unset nodeep-scrub +ceph osd unset noscrub ENDOFCAT # ensure the permissions are OK chmod 0755 $UPGRADE_SCRIPT - diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh index 1637cee2..fd1fd0dc 100755 --- a/extraconfig/tasks/pacemaker_resource_restart.sh +++ b/extraconfig/tasks/pacemaker_resource_restart.sh @@ -7,15 +7,23 @@ pacemaker_status=$(systemctl is-active pacemaker) # Run if pacemaker is running, we're the bootstrap node, # and we're updating the deployment (not creating). if [ "$pacemaker_status" = "active" -a \ - "$(hiera bootstrap_nodeid)" = "$(facter hostname)" -a \ - "$(hiera stack_action)" = "UPDATE" ]; then + "$(hiera bootstrap_nodeid)" = "$(facter hostname)" ]; then - PCMK_RESOURCES="haproxy-clone redis-master rabbitmq-clone galera-master openstack-cinder-volume openstack-cinder-backup" - # Ten minutes of timeout to restart each resource, given there are no constraints should be enough TIMEOUT=600 - for resource in $PCMK_RESOURCES; do - if pcs status | grep $resource; then - pcs resource restart --wait=$TIMEOUT $resource - fi + SERVICES_TO_RESTART="$(ls /var/lib/tripleo/pacemaker-restarts)" + PCS_STATUS_OUTPUT="$(pcs status)" + + for service in $SERVICES_TO_RESTART; do + if ! echo "$PCS_STATUS_OUTPUT" | grep $service; then + echo "Service $service not found as a pacemaker resource, cannot restart it." + exit 1 + fi + done + + for service in $SERVICES_TO_RESTART; do + echo "Restarting $service..." + pcs resource restart --wait=$TIMEOUT $service + rm -f /var/lib/tripleo/pacemaker-restarts/$service done + fi |