From 0dd10ffe4fcd9b191eaceabcd7bb124a4db10b06 Mon Sep 17 00:00:00 2001 From: Jiri Stransky Date: Thu, 21 Jan 2016 13:11:23 +0100 Subject: Introduce update/upgrade workflow Change-Id: I7226070aa87416e79f25625647f8e3076c9e2c9a --- environments/major-upgrade-pacemaker.yaml | 9 ++- .../tasks/major_upgrade_controller_pacemaker_1.sh | 58 ++++++++++++++++++ .../tasks/major_upgrade_controller_pacemaker_2.sh | 71 ++++++++++++++++++++++ extraconfig/tasks/major_upgrade_pacemaker.yaml | 20 ++++-- extraconfig/tasks/major_upgrade_pacemaker_1.sh | 58 ------------------ extraconfig/tasks/major_upgrade_pacemaker_2.sh | 71 ---------------------- extraconfig/tasks/noop.yaml | 16 +++++ extraconfig/tasks/yum_update_noop.yaml | 29 +++++++++ overcloud-resource-registry-puppet.yaml | 1 + overcloud.yaml | 13 ++++ 10 files changed, 209 insertions(+), 137 deletions(-) create mode 100755 extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh create mode 100755 extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh delete mode 100755 extraconfig/tasks/major_upgrade_pacemaker_1.sh delete mode 100755 extraconfig/tasks/major_upgrade_pacemaker_2.sh create mode 100644 extraconfig/tasks/yum_update_noop.yaml diff --git a/environments/major-upgrade-pacemaker.yaml b/environments/major-upgrade-pacemaker.yaml index db078404..e3bbfcce 100644 --- a/environments/major-upgrade-pacemaker.yaml +++ b/environments/major-upgrade-pacemaker.yaml @@ -1,3 +1,8 @@ resource_registry: - OS::TripleO::Tasks::ControllerPrePuppet: ../extraconfig/tasks/major_upgrade_pacemaker.yaml - OS::TripleO::Tasks::ControllerPostPuppet: ../extraconfig/tasks/noop.yaml + OS::TripleO::Tasks::UpdateWorkflow: ../extraconfig/tasks/major_upgrade_pacemaker.yaml + OS::TripleO::Tasks::PackageUpdate: ../extraconfig/tasks/yum_update_noop.yaml + OS::TripleO::ControllerPostDeployment: OS::Heat::None + OS::TripleO::ComputePostDeployment: OS::Heat::None + OS::TripleO::ObjectStoragePostDeployment: OS::Heat::None + OS::TripleO::BlockStoragePostDeployment: OS::Heat::None + OS::TripleO::CephStoragePostDeployment: OS::Heat::None diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh new file mode 100755 index 00000000..bee9a939 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +set -eu + +cluster_sync_timeout=600 + +if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then + echo_error "ERROR: upgrade cannot start with some cluster nodes being offline" + exit 1 +fi + +if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then + pcs resource disable httpd + check_resource httpd stopped 1800 + if pcs status | grep openstack-keystone; then + pcs resource disable openstack-keystone + check_resource openstack-keystone stopped 1800 + fi + pcs resource disable redis + check_resource redis stopped 600 + pcs resource disable mongod + check_resource mongod stopped 600 + pcs resource disable rabbitmq + check_resource rabbitmq stopped 600 + pcs resource disable memcached + check_resource memcached stopped 600 + pcs resource disable galera + check_resource galera stopped 600 + pcs cluster stop --all +fi + +# Swift isn't controled by pacemaker +for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ +openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ +openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do + systemctl stop $S +done + +tstart=$(date +%s) +while systemctl is-active pacemaker; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_sync_timeout )) ; then + echo_error "ERROR: cluster shutdown timed out" + exit 1 + fi +done + +yum update -y + +# Pin messages sent to compute nodes to kilo, these will be upgraded later +crudini --set /etc/nova/nova.conf upgrade_levels compute liberty +# https://bugzilla.redhat.com/show_bug.cgi?id=1284047 +# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435 +crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit +# https://bugzilla.redhat.com/show_bug.cgi?id=1284058 +# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists +crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server" diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh new file mode 100755 index 00000000..0b92a3bb --- /dev/null +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +set -eu + +cluster_form_timeout=600 +cluster_settle_timeout=600 +galera_sync_timeout=600 + +if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then + pcs cluster start --all + + tstart=$(date +%s) + while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_form_timeout )) ; then + echo_error "ERROR: timed out forming the cluster" + exit 1 + fi + done + + if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then + echo_error "ERROR: timed out waiting for cluster to finish transition" + exit 1 + fi + + pcs resource enable galera + check_resource galera started 600 + pcs resource enable mongod + check_resource mongod started 600 + + tstart=$(date +%s) + while ! clustercheck; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > galera_sync_timeout )) ; then + echo_error "ERROR galera sync timed out" + exit 1 + fi + done + + # Run all the db syncs + # TODO: check if this can be triggered in puppet and removed from here + ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf + cinder-manage db sync + glance-manage --config-file=/etc/glance/glance-registry.conf db_sync + heat-manage --config-file /etc/heat/heat.conf db_sync + keystone-manage db_sync + neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head + nova-manage db sync + + pcs resource enable memcached + check_resource memcached started 600 + pcs resource enable rabbitmq + check_resource rabbitmq started 600 + pcs resource enable redis + check_resource redis started 600 + if pcs status | grep openstack-keystone; then + pcs resource enable openstack-keystone + check_resource openstack-keystone started 1800 + fi + pcs resource enable httpd + check_resource httpd started 1800 +fi + +# Swift isn't controled by heat +for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ +openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ +openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do + systemctl start $S +done diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml index 58d7b6d8..12268b0e 100644 --- a/extraconfig/tasks/major_upgrade_pacemaker.yaml +++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml @@ -2,7 +2,15 @@ heat_template_version: 2014-10-16 description: 'Upgrade for Pacemaker deployments' parameters: - servers: + controller_servers: + type: json + compute_servers: + type: json + blockstorage_servers: + type: json + objectstorage_servers: + type: json + cephstorage_servers: type: json input_values: type: json @@ -17,12 +25,12 @@ resources: list_join: - '' - - get_file: pacemaker_common_functions.sh - - get_file: major_upgrade_pacemaker_1.sh + - get_file: major_upgrade_controller_pacemaker_1.sh ControllerPacemakerUpgradeDeployment_Step1: type: OS::Heat::SoftwareDeploymentGroup properties: - servers: {get_param: servers} + servers: {get_param: controller_servers} config: {get_resource: ControllerPacemakerUpgradeConfig_Step1} input_values: {get_param: input_values} @@ -34,12 +42,12 @@ resources: list_join: - '' - - get_file: pacemaker_common_functions.sh - - get_file: major_upgrade_pacemaker_2.sh + - get_file: major_upgrade_controller_pacemaker_2.sh - ControllerPacemakerUpgrade2Deployment_Step2: + ControllerPacemakerUpgradeDeployment_Step2: type: OS::Heat::SoftwareDeploymentGroup depends_on: ControllerPacemakerUpgradeDeployment_Step1 properties: - servers: {get_param: servers} + servers: {get_param: controller_servers} config: {get_resource: ControllerPacemakerUpgradeConfig_Step2} input_values: {get_param: input_values} diff --git a/extraconfig/tasks/major_upgrade_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_pacemaker_1.sh deleted file mode 100755 index bee9a939..00000000 --- a/extraconfig/tasks/major_upgrade_pacemaker_1.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -set -eu - -cluster_sync_timeout=600 - -if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then - echo_error "ERROR: upgrade cannot start with some cluster nodes being offline" - exit 1 -fi - -if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then - pcs resource disable httpd - check_resource httpd stopped 1800 - if pcs status | grep openstack-keystone; then - pcs resource disable openstack-keystone - check_resource openstack-keystone stopped 1800 - fi - pcs resource disable redis - check_resource redis stopped 600 - pcs resource disable mongod - check_resource mongod stopped 600 - pcs resource disable rabbitmq - check_resource rabbitmq stopped 600 - pcs resource disable memcached - check_resource memcached stopped 600 - pcs resource disable galera - check_resource galera stopped 600 - pcs cluster stop --all -fi - -# Swift isn't controled by pacemaker -for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ -openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ -openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do - systemctl stop $S -done - -tstart=$(date +%s) -while systemctl is-active pacemaker; do - sleep 5 - tnow=$(date +%s) - if (( tnow-tstart > cluster_sync_timeout )) ; then - echo_error "ERROR: cluster shutdown timed out" - exit 1 - fi -done - -yum update -y - -# Pin messages sent to compute nodes to kilo, these will be upgraded later -crudini --set /etc/nova/nova.conf upgrade_levels compute liberty -# https://bugzilla.redhat.com/show_bug.cgi?id=1284047 -# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435 -crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit -# https://bugzilla.redhat.com/show_bug.cgi?id=1284058 -# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists -crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server" diff --git a/extraconfig/tasks/major_upgrade_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_pacemaker_2.sh deleted file mode 100755 index 0b92a3bb..00000000 --- a/extraconfig/tasks/major_upgrade_pacemaker_2.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash - -set -eu - -cluster_form_timeout=600 -cluster_settle_timeout=600 -galera_sync_timeout=600 - -if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then - pcs cluster start --all - - tstart=$(date +%s) - while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do - sleep 5 - tnow=$(date +%s) - if (( tnow-tstart > cluster_form_timeout )) ; then - echo_error "ERROR: timed out forming the cluster" - exit 1 - fi - done - - if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then - echo_error "ERROR: timed out waiting for cluster to finish transition" - exit 1 - fi - - pcs resource enable galera - check_resource galera started 600 - pcs resource enable mongod - check_resource mongod started 600 - - tstart=$(date +%s) - while ! clustercheck; do - sleep 5 - tnow=$(date +%s) - if (( tnow-tstart > galera_sync_timeout )) ; then - echo_error "ERROR galera sync timed out" - exit 1 - fi - done - - # Run all the db syncs - # TODO: check if this can be triggered in puppet and removed from here - ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf - cinder-manage db sync - glance-manage --config-file=/etc/glance/glance-registry.conf db_sync - heat-manage --config-file /etc/heat/heat.conf db_sync - keystone-manage db_sync - neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head - nova-manage db sync - - pcs resource enable memcached - check_resource memcached started 600 - pcs resource enable rabbitmq - check_resource rabbitmq started 600 - pcs resource enable redis - check_resource redis started 600 - if pcs status | grep openstack-keystone; then - pcs resource enable openstack-keystone - check_resource openstack-keystone started 1800 - fi - pcs resource enable httpd - check_resource httpd started 1800 -fi - -# Swift isn't controled by heat -for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ -openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ -openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do - systemctl start $S -done diff --git a/extraconfig/tasks/noop.yaml b/extraconfig/tasks/noop.yaml index 0cff7469..dbb863be 100644 --- a/extraconfig/tasks/noop.yaml +++ b/extraconfig/tasks/noop.yaml @@ -4,6 +4,22 @@ description: 'No-op task' parameters: servers: type: json + default: [] + controller_servers: + type: json + default: [] + compute_servers: + type: json + default: [] + blockstorage_servers: + type: json + default: [] + objectstorage_servers: + type: json + default: [] + cephstorage_servers: + type: json + default: [] input_values: type: json default: {} diff --git a/extraconfig/tasks/yum_update_noop.yaml b/extraconfig/tasks/yum_update_noop.yaml new file mode 100644 index 00000000..b759d9c5 --- /dev/null +++ b/extraconfig/tasks/yum_update_noop.yaml @@ -0,0 +1,29 @@ +heat_template_version: 2014-10-16 +description: 'No-op yum update task' + +resources: + + config: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: | + #!/bin/bash + echo -n "false" > $heat_outputs_path.update_managed_packages + inputs: + - name: update_identifier + description: yum will only run for previously unused values of update_identifier + default: '' + - name: command + description: yum sub-command to run, defaults to "update" + default: update + - name: command_arguments + description: yum command arguments, defaults to "" + default: '' + outputs: + - name: update_managed_packages + description: boolean value indicating whether to upgrade managed packages + +outputs: + OS::stack_id: + value: {get_resource: config} diff --git a/overcloud-resource-registry-puppet.yaml b/overcloud-resource-registry-puppet.yaml index 7288aba8..b1a5816e 100644 --- a/overcloud-resource-registry-puppet.yaml +++ b/overcloud-resource-registry-puppet.yaml @@ -23,6 +23,7 @@ resource_registry: OS::TripleO::BootstrapNode::SoftwareConfig: puppet/bootstrap-config.yaml # Tasks (for internal TripleO usage) + OS::TripleO::Tasks::UpdateWorkflow: extraconfig/tasks/noop.yaml OS::TripleO::Tasks::PackageUpdate: extraconfig/tasks/yum_update.yaml OS::TripleO::Tasks::ControllerPrePuppet: extraconfig/tasks/noop.yaml OS::TripleO::Tasks::ControllerPostPuppet: extraconfig/tasks/noop.yaml diff --git a/overcloud.yaml b/overcloud.yaml index 9b95484e..0499fa67 100644 --- a/overcloud.yaml +++ b/overcloud.yaml @@ -1522,10 +1522,23 @@ resources: config: {get_resource: AllNodesValidationConfig} servers: {get_attr: [CephStorage, attributes, nova_server_resource]} + UpdateWorkflow: + type: OS::TripleO::Tasks::UpdateWorkflow + properties: + controller_servers: {get_attr: [Controller, attributes, nova_server_resource]} + compute_servers: {get_attr: [Compute, attributes, nova_server_resource]} + blockstorage_servers: {get_attr: [BlockStorage, attributes, nova_server_resource]} + objectstorage_servers: {get_attr: [ObjectStorage, attributes, nova_server_resource]} + cephstorage_servers: {get_attr: [CephStorage, attributes, nova_server_resource]} + input_values: + deploy_identifier: {get_param: DeployIdentifier} + update_identifier: {get_param: UpdateIdentifier} + # Optional ExtraConfig for all nodes - all roles are passed in here, but # the nested template may configure each role differently (or not at all) AllNodesExtraConfig: type: OS::TripleO::AllNodesExtraConfig + depends_on: UpdateWorkflow properties: controller_servers: {get_attr: [Controller, attributes, nova_server_resource]} compute_servers: {get_attr: [Compute, attributes, nova_server_resource]} -- cgit 1.2.3-korg