3 files changed, 174 insertions, 0 deletions
diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml
new file mode 100644
index 00000000..58d7b6d8
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml
@@ -0,0 +1,45 @@
+heat_template_version: 2014-10-16
+description: 'Upgrade for Pacemaker deployments'
+
+parameters:
+  servers:
+    type: json
+  input_values:
+    type: json
+    description: input values for the software deployments
+
+resources:
+  ControllerPacemakerUpgradeConfig_Step1:
+    type: OS::Heat::SoftwareConfig
+    properties:
+      group: script
+      config:
+        list_join:
+        - ''
+        - - get_file: pacemaker_common_functions.sh
+          - get_file: major_upgrade_pacemaker_1.sh
+
+  ControllerPacemakerUpgradeDeployment_Step1:
+    type: OS::Heat::SoftwareDeploymentGroup
+    properties:
+      servers:  {get_param: servers}
+      config: {get_resource: ControllerPacemakerUpgradeConfig_Step1}
+      input_values: {get_param: input_values}
+
+  ControllerPacemakerUpgradeConfig_Step2:
+    type: OS::Heat::SoftwareConfig
+    properties:
+      group: script
+      config:
+        list_join:
+        - ''
+        - - get_file: pacemaker_common_functions.sh
+          - get_file: major_upgrade_pacemaker_2.sh
+
+  ControllerPacemakerUpgrade2Deployment_Step2:
+    type: OS::Heat::SoftwareDeploymentGroup
+    depends_on: ControllerPacemakerUpgradeDeployment_Step1
+    properties:
+      servers:  {get_param: servers}
+      config: {get_resource: ControllerPacemakerUpgradeConfig_Step2}
+      input_values: {get_param: input_values}
diff --git a/extraconfig/tasks/major_upgrade_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_pacemaker_1.sh
new file mode 100755
index 00000000..bee9a939
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_pacemaker_1.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+set -eu
+
+cluster_sync_timeout=600
+
+if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then
+    echo_error "ERROR: upgrade cannot start with some cluster nodes being offline"
+    exit 1
+fi
+
+if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
+    pcs resource disable httpd
+    check_resource httpd stopped 1800
+    if pcs status | grep openstack-keystone; then
+        pcs resource disable openstack-keystone
+        check_resource openstack-keystone stopped 1800
+    fi
+    pcs resource disable redis
+    check_resource redis stopped 600
+    pcs resource disable mongod
+    check_resource mongod stopped 600
+    pcs resource disable rabbitmq
+    check_resource rabbitmq stopped 600
+    pcs resource disable memcached
+    check_resource memcached stopped 600
+    pcs resource disable galera
+    check_resource galera stopped 600
+    pcs cluster stop --all
+fi
+
+# Swift isn't controled by pacemaker
+for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
+openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
+openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do
+    systemctl stop $S
+done
+
+tstart=$(date +%s)
+while systemctl is-active pacemaker; do
+    sleep 5
+    tnow=$(date +%s)
+    if (( tnow-tstart > cluster_sync_timeout )) ; then
+        echo_error "ERROR: cluster shutdown timed out"
+        exit 1
+    fi
+done
+
+yum update -y
+
+# Pin messages sent to compute nodes to kilo, these will be upgraded later
+crudini  --set /etc/nova/nova.conf upgrade_levels compute liberty
+# https://bugzilla.redhat.com/show_bug.cgi?id=1284047
+# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435
+crudini  --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit
+# https://bugzilla.redhat.com/show_bug.cgi?id=1284058
+# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists
+crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server"
diff --git a/extraconfig/tasks/major_upgrade_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_pacemaker_2.sh
new file mode 100755
index 00000000..0b92a3bb
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_pacemaker_2.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+set -eu
+
+cluster_form_timeout=600
+cluster_settle_timeout=600
+galera_sync_timeout=600
+
+if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
+    pcs cluster start --all
+
+    tstart=$(date +%s)
+    while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do
+        sleep 5
+        tnow=$(date +%s)
+        if (( tnow-tstart > cluster_form_timeout )) ; then
+            echo_error "ERROR: timed out forming the cluster"
+            exit 1
+        fi
+    done
+
+    if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
+        echo_error "ERROR: timed out waiting for cluster to finish transition"
+        exit 1
+    fi
+
+    pcs resource enable galera
+    check_resource galera started 600
+    pcs resource enable mongod
+    check_resource mongod started 600
+
+    tstart=$(date +%s)
+    while ! clustercheck; do
+        sleep 5
+        tnow=$(date +%s)
+        if (( tnow-tstart > galera_sync_timeout )) ; then
+            echo_error "ERROR galera sync timed out"
+            exit 1
+        fi
+    done
+
+    # Run all the db syncs
+    # TODO: check if this can be triggered in puppet and removed from here
+    ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf
+    cinder-manage db sync
+    glance-manage --config-file=/etc/glance/glance-registry.conf db_sync
+    heat-manage --config-file /etc/heat/heat.conf db_sync
+    keystone-manage db_sync
+    neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
+    nova-manage db sync
+
+    pcs resource enable memcached
+    check_resource memcached started 600
+    pcs resource enable rabbitmq
+    check_resource rabbitmq started 600
+    pcs resource enable redis
+    check_resource redis started 600
+    if pcs status | grep openstack-keystone; then
+        pcs resource enable openstack-keystone
+        check_resource openstack-keystone started 1800
+    fi
+    pcs resource enable httpd
+    check_resource httpd started 1800
+fi
+
+# Swift isn't controled by heat
+for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
+openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
+openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do
+    systemctl start $S
+done