diff options
-rw-r--r-- | environments/major-upgrade-pacemaker.yaml | 3 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_pacemaker.yaml | 45 | ||||
-rwxr-xr-x | extraconfig/tasks/major_upgrade_pacemaker_1.sh | 58 | ||||
-rwxr-xr-x | extraconfig/tasks/major_upgrade_pacemaker_2.sh | 71 |
4 files changed, 177 insertions, 0 deletions
diff --git a/environments/major-upgrade-pacemaker.yaml b/environments/major-upgrade-pacemaker.yaml new file mode 100644 index 00000000..db078404 --- /dev/null +++ b/environments/major-upgrade-pacemaker.yaml @@ -0,0 +1,3 @@ +resource_registry: + OS::TripleO::Tasks::ControllerPrePuppet: ../extraconfig/tasks/major_upgrade_pacemaker.yaml + OS::TripleO::Tasks::ControllerPostPuppet: ../extraconfig/tasks/noop.yaml diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml new file mode 100644 index 00000000..58d7b6d8 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml @@ -0,0 +1,45 @@ +heat_template_version: 2014-10-16 +description: 'Upgrade for Pacemaker deployments' + +parameters: + servers: + type: json + input_values: + type: json + description: input values for the software deployments + +resources: + ControllerPacemakerUpgradeConfig_Step1: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - get_file: pacemaker_common_functions.sh + - get_file: major_upgrade_pacemaker_1.sh + + ControllerPacemakerUpgradeDeployment_Step1: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: servers} + config: {get_resource: ControllerPacemakerUpgradeConfig_Step1} + input_values: {get_param: input_values} + + ControllerPacemakerUpgradeConfig_Step2: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - get_file: pacemaker_common_functions.sh + - get_file: major_upgrade_pacemaker_2.sh + + ControllerPacemakerUpgrade2Deployment_Step2: + type: OS::Heat::SoftwareDeploymentGroup + depends_on: ControllerPacemakerUpgradeDeployment_Step1 + properties: + servers: {get_param: servers} + config: {get_resource: ControllerPacemakerUpgradeConfig_Step2} + input_values: {get_param: input_values} diff --git a/extraconfig/tasks/major_upgrade_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_pacemaker_1.sh new file mode 100755 index 00000000..bee9a939 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_pacemaker_1.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +set -eu + +cluster_sync_timeout=600 + +if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then + echo_error "ERROR: upgrade cannot start with some cluster nodes being offline" + exit 1 +fi + +if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then + pcs resource disable httpd + check_resource httpd stopped 1800 + if pcs status | grep openstack-keystone; then + pcs resource disable openstack-keystone + check_resource openstack-keystone stopped 1800 + fi + pcs resource disable redis + check_resource redis stopped 600 + pcs resource disable mongod + check_resource mongod stopped 600 + pcs resource disable rabbitmq + check_resource rabbitmq stopped 600 + pcs resource disable memcached + check_resource memcached stopped 600 + pcs resource disable galera + check_resource galera stopped 600 + pcs cluster stop --all +fi + +# Swift isn't controled by pacemaker +for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ +openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ +openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do + systemctl stop $S +done + +tstart=$(date +%s) +while systemctl is-active pacemaker; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_sync_timeout )) ; then + echo_error "ERROR: cluster shutdown timed out" + exit 1 + fi +done + +yum update -y + +# Pin messages sent to compute nodes to kilo, these will be upgraded later +crudini --set /etc/nova/nova.conf upgrade_levels compute liberty +# https://bugzilla.redhat.com/show_bug.cgi?id=1284047 +# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435 +crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit +# https://bugzilla.redhat.com/show_bug.cgi?id=1284058 +# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists +crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server" diff --git a/extraconfig/tasks/major_upgrade_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_pacemaker_2.sh new file mode 100755 index 00000000..0b92a3bb --- /dev/null +++ b/extraconfig/tasks/major_upgrade_pacemaker_2.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +set -eu + +cluster_form_timeout=600 +cluster_settle_timeout=600 +galera_sync_timeout=600 + +if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then + pcs cluster start --all + + tstart=$(date +%s) + while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_form_timeout )) ; then + echo_error "ERROR: timed out forming the cluster" + exit 1 + fi + done + + if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then + echo_error "ERROR: timed out waiting for cluster to finish transition" + exit 1 + fi + + pcs resource enable galera + check_resource galera started 600 + pcs resource enable mongod + check_resource mongod started 600 + + tstart=$(date +%s) + while ! clustercheck; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > galera_sync_timeout )) ; then + echo_error "ERROR galera sync timed out" + exit 1 + fi + done + + # Run all the db syncs + # TODO: check if this can be triggered in puppet and removed from here + ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf + cinder-manage db sync + glance-manage --config-file=/etc/glance/glance-registry.conf db_sync + heat-manage --config-file /etc/heat/heat.conf db_sync + keystone-manage db_sync + neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head + nova-manage db sync + + pcs resource enable memcached + check_resource memcached started 600 + pcs resource enable rabbitmq + check_resource rabbitmq started 600 + pcs resource enable redis + check_resource redis started 600 + if pcs status | grep openstack-keystone; then + pcs resource enable openstack-keystone + check_resource openstack-keystone started 1800 + fi + pcs resource enable httpd + check_resource httpd started 1800 +fi + +# Swift isn't controled by heat +for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ +openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ +openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do + systemctl start $S +done |