A few major-upgrade issues

This commit does the following: 1. We now explicitly disable/stop and then remove the resources that are moving to systemd. We do this because we want to make sure they are all stopped before doing a yum upgrade, which otherwise would take ages due to rabbitmq and galera being down. It is best if we do this via pcs while we do the HA Full -> HA NG migration because it is simpler to make sure all the services are stopped at that stage. For extra safety we can still do a check by hand. By doing it via pacemaker we have the guarantee that all the migrated services are down already when we stop the cluster (which happens to be a syncronization point between all controller nodes). That way we can be certain that they are all down on all nodes before starting the yum upgrade process. 2. We actually need to start the systemd services in major_upgrade_controller_pacemaker_2.sh and not stop them. 3. We need to use the proper bash variable name 4. Use is_bootstrap_node everywhere to make the code more consistent Change-Id: Ic565c781b80357bed9483df45a4a94ec0423487c Closes-Bug: #1627490
author: Michele Baldessari <michele@acksyn.org> 2016-09-25 14:10:31 +0200
committer: Michele Baldessari <michele@acksyn.org> 2016-09-25 14:10:31 +0200
commit: f9e6a26f32aea4d3c40178f87b61efb924f81f97 (patch)
tree: 878b29e6ccfc8bdadd60c02c8d447cac311b2b7e /extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
parent: 5d49b75b6e8c608ede6fc7bd63b06055ce5f6317 (diff)
1 files changed, 22 insertions, 5 deletions
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
index 2490ce27..4ceedb9b 100755
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
@@ -19,7 +19,7 @@ STONITH_STATE=$(pcs property show stonith-enabled | grep "stonith-enabled" | awk
 pcs property set stonith-enabled=false
 
 # Migrate to HA NG
-if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
+if [[ -n $(is_bootstrap_node) ]]; then
     migrate_full_to_ng_ha
 fi
 
@@ -29,9 +29,26 @@ fi
 # is going to take a long time because rabbit is down. By having the service stopped
 # systemctl try-restart is a noop
 
-for $service in $(services_to_migrate); do
+for service in $(services_to_migrate); do
     manage_systemd_service stop "${service%%-clone}"
-    check_resource_systemd "${service%%-clone}" stopped 600
+    # So the reason for not reusing check_resource_systemd is that
+    # I have observed systemctl is-active returning unknown with at least
+    # one service that was stopped (See LP 1627254)
+    timeout=600
+    tstart=$(date +%s)
+    tend=$(( $tstart + $timeout ))
+    check_interval=3
+    while (( $(date +%s) < $tend )); do
+      if [[ "$(systemctl is-active ${service%%-clone})" = "active" ]]; then
+        echo "$service still active, sleeping $check_interval seconds."
+        sleep $check_interval
+      else
+        # we do not care if it is inactive, unknown or failed as long as it is
+        # not running
+        break
+      fi
+
+    done
 done
 
 # In case the mysql package is updated, the database on disk must be
@@ -46,7 +63,7 @@ done
 # on mysql package versionning, but this can be overriden manually
 # to support specific upgrade scenario
 
-if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
+if [[ -n $(is_bootstrap_node) ]]; then
     if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
         mysqldump $backup_flags > "$MYSQL_BACKUP_DIR/openstack_database.sql"
         cp -rdp /etc/my.cnf* "$MYSQL_BACKUP_DIR"
@@ -68,7 +85,7 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)
 fi
 
 
-# Swift isn't controled by pacemaker
+# Swift isn't controlled by pacemaker
 systemctl_swift stop
 
 tstart=$(date +%s)
author	Michele Baldessari <michele@acksyn.org>	2016-09-25 14:10:31 +0200
committer	Michele Baldessari <michele@acksyn.org>	2016-09-25 14:10:31 +0200
commit	f9e6a26f32aea4d3c40178f87b61efb924f81f97 (patch)
tree	878b29e6ccfc8bdadd60c02c8d447cac311b2b7e /extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
parent	5d49b75b6e8c608ede6fc7bd63b06055ce5f6317 (diff)