diff options
Diffstat (limited to 'extraconfig')
-rw-r--r-- | extraconfig/tasks/major_upgrade_compute.sh | 24 | ||||
-rwxr-xr-x | extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh | 58 | ||||
-rwxr-xr-x | extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh | 71 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_pacemaker.yaml | 94 | ||||
-rw-r--r-- | extraconfig/tasks/noop.yaml | 16 | ||||
-rwxr-xr-x | extraconfig/tasks/pacemaker_common_functions.sh | 39 | ||||
-rwxr-xr-x | extraconfig/tasks/pacemaker_resource_restart.sh | 32 | ||||
-rw-r--r-- | extraconfig/tasks/post_puppet_pacemaker.yaml | 6 | ||||
-rwxr-xr-x | extraconfig/tasks/yum_update.sh | 7 | ||||
-rw-r--r-- | extraconfig/tasks/yum_update_noop.yaml | 29 |
10 files changed, 341 insertions, 35 deletions
diff --git a/extraconfig/tasks/major_upgrade_compute.sh b/extraconfig/tasks/major_upgrade_compute.sh new file mode 100644 index 00000000..2f19d573 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_compute.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# This delivers the compute upgrade script to be invoked as part of the tripleo +# major upgrade workflow. +# +set -eu + +UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh + +cat > $UPGRADE_SCRIPT << ENDOFCAT +### DO NOT MODIFY THIS FILE +### This file is automatically delivered to the compute nodes as part of the +### tripleo upgrades workflow + +# pin nova to kilo (messaging +-1) for the nova-compute service + +crudini --set /etc/nova/nova.conf upgrade_levels compute $upgrade_level_nova_compute +yum -y update + +ENDOFCAT + +# ensure the permissions are OK +chmod 0755 $UPGRADE_SCRIPT + diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh new file mode 100755 index 00000000..5604bb0d --- /dev/null +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +set -eu + +cluster_sync_timeout=600 + +if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then + echo_error "ERROR: upgrade cannot start with some cluster nodes being offline" + exit 1 +fi + +if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then + pcs resource disable httpd + check_resource httpd stopped 1800 + if pcs status | grep openstack-keystone; then + pcs resource disable openstack-keystone + check_resource openstack-keystone stopped 1800 + fi + pcs resource disable redis + check_resource redis stopped 600 + pcs resource disable mongod + check_resource mongod stopped 600 + pcs resource disable rabbitmq + check_resource rabbitmq stopped 600 + pcs resource disable memcached + check_resource memcached stopped 600 + pcs resource disable galera + check_resource galera stopped 600 + pcs cluster stop --all +fi + +# Swift isn't controled by pacemaker +for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ +openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ +openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do + systemctl stop $S +done + +tstart=$(date +%s) +while systemctl is-active pacemaker; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_sync_timeout )) ; then + echo_error "ERROR: cluster shutdown timed out" + exit 1 + fi +done + +yum update -y + +# Pin messages sent to compute nodes to kilo, these will be upgraded later +crudini --set /etc/nova/nova.conf upgrade_levels compute "$upgrade_level_nova_compute" +# https://bugzilla.redhat.com/show_bug.cgi?id=1284047 +# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435 +crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit +# https://bugzilla.redhat.com/show_bug.cgi?id=1284058 +# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists +crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server" diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh new file mode 100755 index 00000000..0b92a3bb --- /dev/null +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +set -eu + +cluster_form_timeout=600 +cluster_settle_timeout=600 +galera_sync_timeout=600 + +if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then + pcs cluster start --all + + tstart=$(date +%s) + while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_form_timeout )) ; then + echo_error "ERROR: timed out forming the cluster" + exit 1 + fi + done + + if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then + echo_error "ERROR: timed out waiting for cluster to finish transition" + exit 1 + fi + + pcs resource enable galera + check_resource galera started 600 + pcs resource enable mongod + check_resource mongod started 600 + + tstart=$(date +%s) + while ! clustercheck; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > galera_sync_timeout )) ; then + echo_error "ERROR galera sync timed out" + exit 1 + fi + done + + # Run all the db syncs + # TODO: check if this can be triggered in puppet and removed from here + ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf + cinder-manage db sync + glance-manage --config-file=/etc/glance/glance-registry.conf db_sync + heat-manage --config-file /etc/heat/heat.conf db_sync + keystone-manage db_sync + neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head + nova-manage db sync + + pcs resource enable memcached + check_resource memcached started 600 + pcs resource enable rabbitmq + check_resource rabbitmq started 600 + pcs resource enable redis + check_resource redis started 600 + if pcs status | grep openstack-keystone; then + pcs resource enable openstack-keystone + check_resource openstack-keystone started 1800 + fi + pcs resource enable httpd + check_resource httpd started 1800 +fi + +# Swift isn't controled by heat +for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ +openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ +openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do + systemctl start $S +done diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml new file mode 100644 index 00000000..5a11bae9 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml @@ -0,0 +1,94 @@ +heat_template_version: 2014-10-16 +description: 'Upgrade for Pacemaker deployments' + +parameters: + controller_servers: + type: json + compute_servers: + type: json + blockstorage_servers: + type: json + objectstorage_servers: + type: json + cephstorage_servers: + type: json + input_values: + type: json + description: input values for the software deployments + + UpgradeLevelNovaCompute: + type: string + description: Nova Compute upgrade level + default: '' + +resources: + # TODO(jistr): for Mitaka->Newton upgrades and further we can use + # map_merge with input_values instead of feeding params into scripts + # via str_replace on bash snippets + + ControllerPacemakerUpgradeConfig_Step1: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - str_replace: + template: | + #!/bin/bash + upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE' + params: + UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute} + - get_file: pacemaker_common_functions.sh + - get_file: major_upgrade_controller_pacemaker_1.sh + + ControllerPacemakerUpgradeDeployment_Step1: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: controller_servers} + config: {get_resource: ControllerPacemakerUpgradeConfig_Step1} + input_values: {get_param: input_values} + + ControllerPacemakerUpgradeConfig_Step2: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - get_file: pacemaker_common_functions.sh + - get_file: major_upgrade_controller_pacemaker_2.sh + + ControllerPacemakerUpgradeDeployment_Step2: + type: OS::Heat::SoftwareDeploymentGroup + depends_on: ControllerPacemakerUpgradeDeployment_Step1 + properties: + servers: {get_param: controller_servers} + config: {get_resource: ControllerPacemakerUpgradeConfig_Step2} + input_values: {get_param: input_values} + + ComputeDeliverUpgradeConfig_Step3: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - str_replace: + template: | + #!/bin/bash + upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE' + params: + UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute} + - get_file: pacemaker_common_functions.sh + - get_file: major_upgrade_compute.sh + + ComputeDeliverUpgradeConfigDeployment_Step3: + type: OS::Heat::SoftwareDeploymentGroup + depends_on: ControllerPacemakerUpgradeDeployment_Step2 + properties: + servers: {get_param: compute_servers} + config: {get_resource: ComputeDeliverUpgradeConfig_Step3} + input_values: {get_param: input_values} + + diff --git a/extraconfig/tasks/noop.yaml b/extraconfig/tasks/noop.yaml index 0cff7469..dbb863be 100644 --- a/extraconfig/tasks/noop.yaml +++ b/extraconfig/tasks/noop.yaml @@ -4,6 +4,22 @@ description: 'No-op task' parameters: servers: type: json + default: [] + controller_servers: + type: json + default: [] + compute_servers: + type: json + default: [] + blockstorage_servers: + type: json + default: [] + objectstorage_servers: + type: json + default: [] + cephstorage_servers: + type: json + default: [] input_values: type: json default: {} diff --git a/extraconfig/tasks/pacemaker_common_functions.sh b/extraconfig/tasks/pacemaker_common_functions.sh new file mode 100755 index 00000000..32d06c4a --- /dev/null +++ b/extraconfig/tasks/pacemaker_common_functions.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set -eu + +function check_resource { + + if [ "$#" -ne 3 ]; then + echo_error "ERROR: check_resource function expects 3 parameters, $# given" + exit 1 + fi + + service=$1 + state=$2 + timeout=$3 + + if [ "$state" = "stopped" ]; then + match_for_incomplete='Started' + else # started + match_for_incomplete='Stopped' + fi + + if timeout -k 10 $timeout crm_resource --wait; then + node_states=$(pcs status --full | grep "$service" | grep -v Clone) + if echo "$node_states" | grep -q "$match_for_incomplete"; then + echo_error "ERROR: cluster finished transition but $service was not in $state state, exiting." + exit 1 + else + echo "$service has $state" + fi + else + echo_error "ERROR: cluster remained unstable for more than $timeout seconds, exiting." + exit 1 + fi + +} + +function echo_error { + echo "$@" | tee /dev/fd2 +} diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh index 12201097..b2bdc55a 100755 --- a/extraconfig/tasks/pacemaker_resource_restart.sh +++ b/extraconfig/tasks/pacemaker_resource_restart.sh @@ -3,38 +3,6 @@ set -eux pacemaker_status=$(systemctl is-active pacemaker) -check_interval=3 - -function check_resource { - - service=$1 - state=$2 - timeout=$3 - tstart=$(date +%s) - tend=$(( $tstart + $timeout )) - - if [ "$state" = "stopped" ]; then - match_for_incomplete='Started' - else # started - match_for_incomplete='Stopped' - fi - - while (( $(date +%s) < $tend )); do - node_states=$(pcs status --full | grep "$service" | grep -v Clone) - if echo "$node_states" | grep -q "$match_for_incomplete"; then - echo "$service not yet $state, sleeping $check_interval seconds." - sleep $check_interval - else - echo "$service has $state" - timeout -k 10 $timeout crm_resource --wait - return - fi - done - - echo "$service never $state after $timeout seconds" | tee /dev/fd/2 - exit 1 - -} # Run if pacemaker is running, we're the bootstrap node, # and we're updating the deployment (not creating). diff --git a/extraconfig/tasks/post_puppet_pacemaker.yaml b/extraconfig/tasks/post_puppet_pacemaker.yaml index 7de41d94..fbed9ce5 100644 --- a/extraconfig/tasks/post_puppet_pacemaker.yaml +++ b/extraconfig/tasks/post_puppet_pacemaker.yaml @@ -33,7 +33,11 @@ resources: type: OS::Heat::SoftwareConfig properties: group: script - config: {get_file: pacemaker_resource_restart.sh} + config: + list_join: + - '' + - - get_file: pacemaker_common_functions.sh + - get_file: pacemaker_resource_restart.sh ControllerPostPuppetRestartDeployment: type: OS::Heat::SoftwareDeployments diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh index 39179024..59e4be45 100755 --- a/extraconfig/tasks/yum_update.sh +++ b/extraconfig/tasks/yum_update.sh @@ -128,6 +128,9 @@ openstack-nova-scheduler" # mongod start timeout is higher, setting only stop timeout pcs -f $pacemaker_dumpfile resource update mongod op start timeout=370s op stop timeout=200s + echo "Making sure rabbitmq has the notify=true meta parameter" + pcs -f $pacemaker_dumpfile resource update rabbitmq meta notify=true + echo "Applying new Pacemaker config" if ! pcs cluster cib-push $pacemaker_dumpfile; then echo "ERROR failed to apply new pacemaker config" @@ -151,14 +154,14 @@ openstack-nova-scheduler" kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || : else echo "Upgrading openstack-puppet-modules" - yum -y update openstack-puppet-modules + yum -q -y update openstack-puppet-modules echo "Upgrading other packages is handled by config management tooling" echo -n "true" > $heat_outputs_path.update_managed_packages exit 0 fi command=${command:-update} -full_command="yum -y $command $command_arguments" +full_command="yum -q -y $command $command_arguments" echo "Running: $full_command" result=$($full_command) diff --git a/extraconfig/tasks/yum_update_noop.yaml b/extraconfig/tasks/yum_update_noop.yaml new file mode 100644 index 00000000..b759d9c5 --- /dev/null +++ b/extraconfig/tasks/yum_update_noop.yaml @@ -0,0 +1,29 @@ +heat_template_version: 2014-10-16 +description: 'No-op yum update task' + +resources: + + config: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: | + #!/bin/bash + echo -n "false" > $heat_outputs_path.update_managed_packages + inputs: + - name: update_identifier + description: yum will only run for previously unused values of update_identifier + default: '' + - name: command + description: yum sub-command to run, defaults to "update" + default: update + - name: command_arguments + description: yum command arguments, defaults to "" + default: '' + outputs: + - name: update_managed_packages + description: boolean value indicating whether to upgrade managed packages + +outputs: + OS::stack_id: + value: {get_resource: config} |