diff options
Diffstat (limited to 'extraconfig/tasks')
5 files changed, 33 insertions, 6 deletions
diff --git a/extraconfig/tasks/major_upgrade_ceph_mon.sh b/extraconfig/tasks/major_upgrade_ceph_mon.sh index b633e658..e0d160f1 100755 --- a/extraconfig/tasks/major_upgrade_ceph_mon.sh +++ b/extraconfig/tasks/major_upgrade_ceph_mon.sh @@ -5,7 +5,7 @@ set -o pipefail echo INFO: starting $(basename "$0") # Exit if not running -if ! pidof ceph-mon; then +if ! pidof ceph-mon &> /dev/null; then echo INFO: ceph-mon is not running, skipping exit 0 fi @@ -54,7 +54,7 @@ if [[ "$UPDATED_VERSION" =~ ^0\.94.* ]]; then elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then # RPM could own some of these but we can't take risks on the pre-existing files for d in /var/lib/ceph/mon /var/log/ceph /var/run/ceph /etc/ceph; do - chown -R ceph:ceph $d || echo WARNING: chown of $d failed + chown -L -R ceph:ceph $d || echo WARNING: chown of $d failed done # Replay udev events with newer rules @@ -71,6 +71,10 @@ elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then sleep 10; done" + # if tunables become legacy, cluster status will be HEALTH_WARN causing + # upgrade to fail on following node + ceph osd crush tunables default + echo INFO: Ceph was upgraded to Jewel else echo ERROR: Ceph was upgraded to an unknown release, daemon is stopped, need manual intervention diff --git a/extraconfig/tasks/major_upgrade_ceph_storage.sh b/extraconfig/tasks/major_upgrade_ceph_storage.sh index dc80a724..705e84eb 100644 --- a/extraconfig/tasks/major_upgrade_ceph_storage.sh +++ b/extraconfig/tasks/major_upgrade_ceph_storage.sh @@ -18,7 +18,7 @@ set -eu echo INFO: starting $(basename "$0") # Exit if not running -if ! pidof ceph-osd; then +if ! pidof ceph-osd &> /dev/null; then echo INFO: ceph-osd is not running, skipping exit 0 fi @@ -63,7 +63,7 @@ if [[ "$UPDATED_VERSION" =~ ^0\.94.* ]]; then elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then # RPM could own some of these but we can't take risks on the pre-existing files for d in /var/lib/ceph/osd /var/log/ceph /var/run/ceph /etc/ceph; do - chown -R ceph:ceph $d || echo WARNING: chown of $d failed + chown -L -R ceph:ceph $d || echo WARNING: chown of $d failed done # Replay udev events with newer rules diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh index d4200e5f..23074fcb 100755 --- a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh @@ -20,9 +20,13 @@ check_disk_for_mysql_dump STONITH_STATE=$(pcs property show stonith-enabled | grep "stonith-enabled" | awk '{ print $2 }') pcs property set stonith-enabled=false -# Migrate to HA NG +# Migrate to HA NG and fix up rabbitmq queues +# We fix up the rabbitmq ha queues after the migration because it will +# restart the rabbitmq resource. Doing it after the migration means no other +# services will be restart as there are no other constraints if [[ -n $(is_bootstrap_node) ]]; then migrate_full_to_ng_ha + rabbitmq_mitaka_newton_upgrade fi # After migrating the cluster to HA-NG the services not under pacemaker's control diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh index fc365939..4203eba9 100755 --- a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh @@ -33,7 +33,7 @@ fi start_or_enable_service galera check_resource galera started 600 start_or_enable_service redis -check_resource galera started 600 +check_resource redis started 600 # We need mongod which is now a systemd service up and running before calling # ceilometer-dbsync. There is still a race here: mongod might not be up on all nodes # so ceilometer-dbsync will fail a couple of times before that. As it retries indefinitely diff --git a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh index cd78f838..5b834b2b 100644 --- a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh +++ b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh @@ -169,3 +169,22 @@ function migrate_full_to_ng_ha { fi fi } + +# This function will make sure that the rabbitmq ha policies are converted from mitaka to newton +# In mitaka we had: Attributes: set_policy="ha-all ^(?!amq\.).* {"ha-mode":"all"}" +# In newton we want: Attributes: set_policy="ha-all ^(?!amq\.).* {"ha-mode":"exactly","ha-params":2}" +# The nr "2" should be CEIL(N/2) where N is the number of Controllers (i.e. rabbit instances) +# Note that changing an attribute like this makes the rabbitmq resource restart +function rabbitmq_mitaka_newton_upgrade { + if pcs resource show rabbitmq-clone | grep -q -E "Attributes:.*\"ha-mode\":\"all\""; then + # Number of controller is obtained by counting how many hostnames we + # have in controller_node_names hiera key + nr_controllers=$(($(hiera controller_node_names | grep -o "," |wc -l) + 1)) + nr_queues=$(($nr_controllers / 2 + ($nr_controllers % 2))) + if ! [ $nr_queues -gt 0 -a $nr_queues -le $nr_controllers ]; then + echo_error "ERROR: The nr. of HA queues during the M/N upgrade is out of range $nr_queues" + exit 1 + fi + pcs resource update rabbitmq set_policy='ha-all ^(?!amq\\.).* {"ha-mode":"exactly","ha-params":'"$nr_queues}" --wait=600 + fi +} |