diff options
Diffstat (limited to 'extraconfig/tasks')
-rw-r--r-- | extraconfig/tasks/major_upgrade_block_storage.sh | 8 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_ceph_storage.sh | 35 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_compute.sh | 26 | ||||
-rwxr-xr-x | extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh | 59 | ||||
-rwxr-xr-x | extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh | 70 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_object_storage.sh | 39 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_pacemaker.yaml | 85 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_pacemaker_init.yaml | 131 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_pacemaker_migrations.sh | 75 | ||||
-rw-r--r-- | extraconfig/tasks/noop.yaml | 10 | ||||
-rwxr-xr-x | extraconfig/tasks/pacemaker_common_functions.sh | 62 | ||||
-rwxr-xr-x | extraconfig/tasks/pacemaker_maintenance_mode.sh | 19 | ||||
-rwxr-xr-x | extraconfig/tasks/pacemaker_resource_restart.sh | 42 | ||||
-rw-r--r-- | extraconfig/tasks/post_puppet_pacemaker.yaml | 6 | ||||
-rw-r--r-- | extraconfig/tasks/pre_puppet_pacemaker.yaml | 9 | ||||
-rwxr-xr-x | extraconfig/tasks/yum_update.sh | 113 | ||||
-rw-r--r-- | extraconfig/tasks/yum_update_noop.yaml | 29 |
17 files changed, 657 insertions, 161 deletions
diff --git a/extraconfig/tasks/major_upgrade_block_storage.sh b/extraconfig/tasks/major_upgrade_block_storage.sh new file mode 100644 index 00000000..07666245 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_block_storage.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# +# This runs an upgrade of Cinder Block Storage nodes. +# +set -eu + +yum -y install python-zaqarclient # needed for os-collect-config +yum -y -q update diff --git a/extraconfig/tasks/major_upgrade_ceph_storage.sh b/extraconfig/tasks/major_upgrade_ceph_storage.sh new file mode 100644 index 00000000..de42b16d --- /dev/null +++ b/extraconfig/tasks/major_upgrade_ceph_storage.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# +# This delivers the ceph-storage upgrade script to be invoked as part of the tripleo +# major upgrade workflow. +# +set -eu + +UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh + +cat > $UPGRADE_SCRIPT << ENDOFCAT +### DO NOT MODIFY THIS FILE +### This file is automatically delivered to the ceph-storage nodes as part of the +### tripleo upgrades workflow + + +function systemctl_ceph { + action=\$1 + systemctl \$action ceph +} + +# "so that mirrors aren't rebalanced as if the OSD died" - gfidente +ceph osd set noout + +systemctl_ceph stop +yum -y install python-zaqarclient # needed for os-collect-config +yum -y update +systemctl_ceph start + +ceph osd unset noout + +ENDOFCAT + +# ensure the permissions are OK +chmod 0755 $UPGRADE_SCRIPT + diff --git a/extraconfig/tasks/major_upgrade_compute.sh b/extraconfig/tasks/major_upgrade_compute.sh new file mode 100644 index 00000000..78628c8c --- /dev/null +++ b/extraconfig/tasks/major_upgrade_compute.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# +# This delivers the compute upgrade script to be invoked as part of the tripleo +# major upgrade workflow. +# +set -eu + +UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh + +cat > $UPGRADE_SCRIPT << ENDOFCAT +### DO NOT MODIFY THIS FILE +### This file is automatically delivered to the compute nodes as part of the +### tripleo upgrades workflow + +# pin nova to kilo (messaging +-1) for the nova-compute service + +crudini --set /etc/nova/nova.conf upgrade_levels compute $upgrade_level_nova_compute + +yum -y install python-zaqarclient # needed for os-collect-config +yum -y update + +ENDOFCAT + +# ensure the permissions are OK +chmod 0755 $UPGRADE_SCRIPT + diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh new file mode 100755 index 00000000..f5399222 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +set -eu + +cluster_sync_timeout=600 + +if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then + echo_error "ERROR: upgrade cannot start with some cluster nodes being offline" + exit 1 +fi + +if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then + pcs resource disable httpd + check_resource httpd stopped 1800 + pcs resource disable openstack-core + check_resource openstack-core stopped 1800 + pcs resource disable redis + check_resource redis stopped 600 + pcs resource disable mongod + check_resource mongod stopped 600 + pcs resource disable rabbitmq + check_resource rabbitmq stopped 600 + pcs resource disable memcached + check_resource memcached stopped 600 + pcs resource disable galera + check_resource galera stopped 600 + # Disable all VIPs before stopping the cluster, so that pcs doesn't use one as a source address: + # https://bugzilla.redhat.com/show_bug.cgi?id=1330688 + for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Started | awk '{ print $1 }'); do + pcs resource disable $vip + check_resource $vip stopped 60 + done + pcs cluster stop --all +fi + +# Swift isn't controled by pacemaker +systemctl_swift stop + +tstart=$(date +%s) +while systemctl is-active pacemaker; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_sync_timeout )) ; then + echo_error "ERROR: cluster shutdown timed out" + exit 1 + fi +done + +yum -y install python-zaqarclient # needed for os-collect-config +yum -y -q update + +# Pin messages sent to compute nodes to kilo, these will be upgraded later +crudini --set /etc/nova/nova.conf upgrade_levels compute "$upgrade_level_nova_compute" +# https://bugzilla.redhat.com/show_bug.cgi?id=1284047 +# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435 +crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit +# https://bugzilla.redhat.com/show_bug.cgi?id=1284058 +# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists +crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server" diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh new file mode 100755 index 00000000..643ae57f --- /dev/null +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +set -eu + +cluster_form_timeout=600 +cluster_settle_timeout=600 +galera_sync_timeout=600 + +if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then + pcs cluster start --all + + tstart=$(date +%s) + while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_form_timeout )) ; then + echo_error "ERROR: timed out forming the cluster" + exit 1 + fi + done + + if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then + echo_error "ERROR: timed out waiting for cluster to finish transition" + exit 1 + fi + + for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do + pcs resource enable $vip + check_resource $vip started 60 + done + + pcs resource enable galera + check_resource galera started 600 + pcs resource enable mongod + check_resource mongod started 600 + + tstart=$(date +%s) + while ! clustercheck; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > galera_sync_timeout )) ; then + echo_error "ERROR galera sync timed out" + exit 1 + fi + done + + # Run all the db syncs + # TODO: check if this can be triggered in puppet and removed from here + ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf + cinder-manage db sync + glance-manage --config-file=/etc/glance/glance-registry.conf db_sync + heat-manage --config-file /etc/heat/heat.conf db_sync + keystone-manage db_sync + neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head + nova-manage db sync + + pcs resource enable memcached + check_resource memcached started 600 + pcs resource enable rabbitmq + check_resource rabbitmq started 600 + pcs resource enable redis + check_resource redis started 600 + pcs resource enable openstack-core + check_resource openstack-core started 1800 + pcs resource enable httpd + check_resource httpd started 1800 +fi + +# Swift isn't controled by heat +systemctl_swift start diff --git a/extraconfig/tasks/major_upgrade_object_storage.sh b/extraconfig/tasks/major_upgrade_object_storage.sh new file mode 100644 index 00000000..931f4f42 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_object_storage.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# +# This delivers the swift-storage upgrade script to be invoked as part of the tripleo +# major upgrade workflow. +# +set -eu + +UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh + +cat > $UPGRADE_SCRIPT << ENDOFCAT +### DO NOT MODIFY THIS FILE +### This file is automatically delivered to the swift-storage nodes as part of the +### tripleo upgrades workflow + + +function systemctl_swift { + action=\$1 + for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ + openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ + openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object; do + systemctl \$action \$S + done +} + + +systemctl_swift stop + +yum -y install python-zaqarclient # needed for os-collect-config +yum -y update + +systemctl_swift start + + + +ENDOFCAT + +# ensure the permissions are OK +chmod 0755 $UPGRADE_SCRIPT + diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml new file mode 100644 index 00000000..4af3186c --- /dev/null +++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml @@ -0,0 +1,85 @@ +heat_template_version: 2014-10-16 +description: 'Upgrade for Pacemaker deployments' + +parameters: + controller_servers: + type: json + compute_servers: + type: json + blockstorage_servers: + type: json + objectstorage_servers: + type: json + cephstorage_servers: + type: json + input_values: + type: json + description: input values for the software deployments + + UpgradeLevelNovaCompute: + type: string + description: Nova Compute upgrade level + default: '' + +resources: + # TODO(jistr): for Mitaka->Newton upgrades and further we can use + # map_merge with input_values instead of feeding params into scripts + # via str_replace on bash snippets + + ControllerPacemakerUpgradeConfig_Step1: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - str_replace: + template: | + #!/bin/bash + upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE' + params: + UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute} + - get_file: pacemaker_common_functions.sh + - get_file: major_upgrade_pacemaker_migrations.sh + - get_file: major_upgrade_controller_pacemaker_1.sh + + ControllerPacemakerUpgradeDeployment_Step1: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: controller_servers} + config: {get_resource: ControllerPacemakerUpgradeConfig_Step1} + input_values: {get_param: input_values} + + BlockStorageUpgradeConfig: + type: OS::Heat::SoftwareConfig + depends_on: ControllerPacemakerUpgradeDeployment_Step1 + properties: + group: script + config: {get_file: major_upgrade_block_storage.sh} + + BlockStorageUpgradeDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: blockstorage_servers} + config: {get_resource: BlockStorageUpgradeConfig} + input_values: {get_param: input_values} + + ControllerPacemakerUpgradeConfig_Step2: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - get_file: pacemaker_common_functions.sh + - get_file: major_upgrade_pacemaker_migrations.sh + - get_file: major_upgrade_controller_pacemaker_2.sh + + ControllerPacemakerUpgradeDeployment_Step2: + type: OS::Heat::SoftwareDeploymentGroup + depends_on: BlockStorageUpgradeDeployment + properties: + servers: {get_param: controller_servers} + config: {get_resource: ControllerPacemakerUpgradeConfig_Step2} + input_values: {get_param: input_values} + diff --git a/extraconfig/tasks/major_upgrade_pacemaker_init.yaml b/extraconfig/tasks/major_upgrade_pacemaker_init.yaml new file mode 100644 index 00000000..623549a0 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_pacemaker_init.yaml @@ -0,0 +1,131 @@ +heat_template_version: 2014-10-16 +description: 'Upgrade for Pacemaker deployments' + +parameters: + + controller_servers: + type: json + compute_servers: + type: json + blockstorage_servers: + type: json + objectstorage_servers: + type: json + cephstorage_servers: + type: json + input_values: + type: json + description: input values for the software deployments + + UpgradeInitCommand: + type: string + description: | + Command or script snippet to run on all overcloud nodes to + initialize the upgrade process. E.g. a repository switch. + default: '' + UpgradeLevelNovaCompute: + type: string + description: Nova Compute upgrade level + default: '' + +resources: + + # For the UpgradeInit also rename /etc/resolv.conf.save for +bug/1567004 + + UpgradeInitConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - "#!/bin/bash\n\n" + - "if [[ -f /etc/resolv.conf.save ]] ; then rm /etc/resolv.conf.save; fi\n\n" + - get_param: UpgradeInitCommand + + UpgradeInitControllerDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: controller_servers} + config: {get_resource: UpgradeInitConfig} + input_values: {get_param: input_values} + + UpgradeInitComputeDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: compute_servers} + config: {get_resource: UpgradeInitConfig} + input_values: {get_param: input_values} + + UpgradeInitBlockStorageDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: blockstorage_servers} + config: {get_resource: UpgradeInitConfig} + input_values: {get_param: input_values} + + UpgradeInitObjectStorageDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: objectstorage_servers} + config: {get_resource: UpgradeInitConfig} + input_values: {get_param: input_values} + + UpgradeInitCephStorageDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: cephstorage_servers} + config: {get_resource: UpgradeInitConfig} + input_values: {get_param: input_values} + + # TODO(jistr): for Mitaka->Newton upgrades and further we can use + # map_merge with input_values instead of feeding params into scripts + # via str_replace on bash snippets + + ComputeDeliverUpgradeScriptConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - str_replace: + template: | + #!/bin/bash + upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE' + params: + UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute} + - get_file: major_upgrade_compute.sh + + ComputeDeliverUpgradeScriptDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: compute_servers} + config: {get_resource: ComputeDeliverUpgradeScriptConfig} + input_values: {get_param: input_values} + + ObjectStorageDeliverUpgradeScriptConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: {get_file: major_upgrade_object_storage.sh} + + ObjectStorageDeliverUpgradeScriptDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: objectstorage_servers} + config: {get_resource: ObjectStorageDeliverUpgradeScriptConfig} + input_values: {get_param: input_values} + + CephStorageDeliverUpgradeScriptConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: {get_file: major_upgrade_ceph_storage.sh} + + CephStorageDeliverUpgradeScriptDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: cephstorage_servers} + config: {get_resource: CephStorageDeliverUpgradeScriptConfig} + input_values: {get_param: input_values} diff --git a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh new file mode 100644 index 00000000..b63198db --- /dev/null +++ b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# Special pieces of upgrade migration logic go into this +# file. E.g. Pacemaker cluster transitions for existing deployments, +# matching changes to overcloud_controller_pacemaker.pp (Puppet +# handles deployment, this file handles migrations). +# +# This file shouldn't execute any action on its own, all logic should +# be wrapped into bash functions. Upgrade scripts will source this +# file and call the functions defined in this file where appropriate. +# +# The migration functions should be idempotent. If the migration has +# been already applied, it should be possible to call the function +# again without damaging the deployment or failing the upgrade. + +function add_missing_openstack_core_constraints { + # The CIBs are saved under /root as they might contain sensitive data + CIB="/root/migration.cib" + CIB_BACKUP="/root/backup.cib" + CIB_PUSH_NEEDED=n + + rm -f "$CIB" "$CIB_BACKUP" || /bin/true + pcs cluster cib "$CIB" + cp "$CIB" "$CIB_BACKUP" + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-sahara-api-clone then start openstack-sahara-engine-clone'; then + pcs -f "$CIB" constraint order start openstack-sahara-api-clone then start openstack-sahara-engine-clone + CIB_PUSH_NEEDED=y + fi + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-ceilometer-notification-clone'; then + pcs -f "$CIB" constraint order start openstack-core-clone then start openstack-ceilometer-notification-clone + CIB_PUSH_NEEDED=y + fi + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone'; then + pcs -f "$CIB" constraint order start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone + CIB_PUSH_NEEDED=y + fi + + if pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone'; then + CID=$(pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone' | sed -e 's/.*id\://g' -e 's/)//g') + pcs -f "$CIB" constraint remove $CID + CIB_PUSH_NEEDED=y + fi + + if [ "$CIB_PUSH_NEEDED" = 'y' ]; then + pcs cluster cib-push "$CIB" + fi +} + +function remove_ceilometer_alarm { + if pcs status | grep openstack-ceilometer-alarm; then + # Disable pacemaker resources for ceilometer-alarms + pcs resource disable openstack-ceilometer-alarm-evaluator + check_resource openstack-ceilometer-alarm-evaluator stopped 600 + pcs resource delete openstack-ceilometer-alarm-evaluator + pcs resource disable openstack-ceilometer-alarm-notifier + check_resource openstack-ceilometer-alarm-notifier stopped 600 + pcs resource delete openstack-ceilometer-alarm-notifier + + # remove constraints + pcs constraint remove ceilometer-delay-then-ceilometer-alarm-evaluator-constraint + pcs constraint remove ceilometer-alarm-evaluator-with-ceilometer-delay-colocation + pcs constraint remove ceilometer-alarm-evaluator-then-ceilometer-alarm-notifier-constraint + pcs constraint remove ceilometer-alarm-notifier-with-ceilometer-alarm-evaluator-colocation + pcs constraint remove ceilometer-alarm-notifier-then-ceilometer-notification-constraint + pcs constraint remove ceilometer-notification-with-ceilometer-alarm-notifier-colocation + + fi + + # uninstall openstack-ceilometer-alarm package + yum -y remove openstack-ceilometer-alarm + +} diff --git a/extraconfig/tasks/noop.yaml b/extraconfig/tasks/noop.yaml deleted file mode 100644 index 0cff7469..00000000 --- a/extraconfig/tasks/noop.yaml +++ /dev/null @@ -1,10 +0,0 @@ -heat_template_version: 2014-10-16 -description: 'No-op task' - -parameters: - servers: - type: json - input_values: - type: json - default: {} - description: input values for the software deployments diff --git a/extraconfig/tasks/pacemaker_common_functions.sh b/extraconfig/tasks/pacemaker_common_functions.sh new file mode 100755 index 00000000..7d794c97 --- /dev/null +++ b/extraconfig/tasks/pacemaker_common_functions.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +set -eu + +function check_resource { + + if [ "$#" -ne 3 ]; then + echo_error "ERROR: check_resource function expects 3 parameters, $# given" + exit 1 + fi + + service=$1 + state=$2 + timeout=$3 + + if [ "$state" = "stopped" ]; then + match_for_incomplete='Started' + else # started + match_for_incomplete='Stopped' + fi + + nodes_local=$(pcs status | grep ^Online | sed 's/.*\[ \(.*\) \]/\1/g' | sed 's/ /\|/g') + if timeout -k 10 $timeout crm_resource --wait; then + node_states=$(pcs status --full | grep "$service" | grep -v Clone | { egrep "$nodes_local" || true; } ) + if echo "$node_states" | grep -q "$match_for_incomplete"; then + echo_error "ERROR: cluster finished transition but $service was not in $state state, exiting." + exit 1 + else + echo "$service has $state" + fi + else + echo_error "ERROR: cluster remained unstable for more than $timeout seconds, exiting." + exit 1 + fi + +} + +function echo_error { + echo "$@" | tee /dev/fd2 +} + +function systemctl_swift { + services=( openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ + openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ + openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy ) + action=$1 + case $action in + stop) + services=$(systemctl | grep swift | grep running | awk '{print $1}') + ;; + start) + enable_swift_storage=$(hiera -c /etc/puppet/hiera.yaml 'enable_swift_storage') + if [[ $enable_swift_storage != "true" ]]; then + services=( openstack-swift-proxy ) + fi + ;; + *) services=() ;; # for safetly, should never happen + esac + for S in ${services[@]}; do + systemctl $action $S + done +} diff --git a/extraconfig/tasks/pacemaker_maintenance_mode.sh b/extraconfig/tasks/pacemaker_maintenance_mode.sh new file mode 100755 index 00000000..ddc84ad2 --- /dev/null +++ b/extraconfig/tasks/pacemaker_maintenance_mode.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -x + +# On initial deployment, the pacemaker service is disabled and is-active exits +# 3 in that case, so allow this to fail gracefully. +pacemaker_status=$(systemctl is-active pacemaker || :) + +if [ "$pacemaker_status" = "active" ]; then + pcs property set maintenance-mode=true +fi + +# We need to reload haproxy in case the certificate changed because +# puppet doesn't know the contents of the cert file. We shouldn't +# reload it if it wasn't already active (such as if using external +# loadbalancer or on initial deployment). +haproxy_status=$(systemctl is-active haproxy || :) +if [ "$haproxy_status" = "active" ]; then + systemctl reload haproxy +fi diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh index 12201097..b2e5be16 100755 --- a/extraconfig/tasks/pacemaker_resource_restart.sh +++ b/extraconfig/tasks/pacemaker_resource_restart.sh @@ -3,44 +3,12 @@ set -eux pacemaker_status=$(systemctl is-active pacemaker) -check_interval=3 - -function check_resource { - - service=$1 - state=$2 - timeout=$3 - tstart=$(date +%s) - tend=$(( $tstart + $timeout )) - - if [ "$state" = "stopped" ]; then - match_for_incomplete='Started' - else # started - match_for_incomplete='Stopped' - fi - - while (( $(date +%s) < $tend )); do - node_states=$(pcs status --full | grep "$service" | grep -v Clone) - if echo "$node_states" | grep -q "$match_for_incomplete"; then - echo "$service not yet $state, sleeping $check_interval seconds." - sleep $check_interval - else - echo "$service has $state" - timeout -k 10 $timeout crm_resource --wait - return - fi - done - - echo "$service never $state after $timeout seconds" | tee /dev/fd/2 - exit 1 - -} # Run if pacemaker is running, we're the bootstrap node, # and we're updating the deployment (not creating). if [ "$pacemaker_status" = "active" -a \ "$(hiera bootstrap_nodeid)" = "$(facter hostname)" -a \ - "$(hiera update_identifier)" != "nil" ]; then + "$(hiera stack_action)" = "UPDATE" ]; then #ensure neutron constraints like #https://review.openstack.org/#/c/245093/ @@ -50,8 +18,8 @@ if [ "$pacemaker_status" = "active" -a \ pcs resource disable httpd check_resource httpd stopped 300 - pcs resource disable openstack-keystone - check_resource openstack-keystone stopped 1800 + pcs resource disable openstack-core + check_resource openstack-core stopped 1800 if pcs status | grep haproxy-clone; then pcs resource restart haproxy-clone @@ -62,8 +30,8 @@ if [ "$pacemaker_status" = "active" -a \ pcs resource restart memcached-clone pcs resource restart galera-master - pcs resource enable openstack-keystone - check_resource openstack-keystone started 1800 + pcs resource enable openstack-core + check_resource openstack-core started 1800 pcs resource enable httpd check_resource httpd started 800 diff --git a/extraconfig/tasks/post_puppet_pacemaker.yaml b/extraconfig/tasks/post_puppet_pacemaker.yaml index 7de41d94..fbed9ce5 100644 --- a/extraconfig/tasks/post_puppet_pacemaker.yaml +++ b/extraconfig/tasks/post_puppet_pacemaker.yaml @@ -33,7 +33,11 @@ resources: type: OS::Heat::SoftwareConfig properties: group: script - config: {get_file: pacemaker_resource_restart.sh} + config: + list_join: + - '' + - - get_file: pacemaker_common_functions.sh + - get_file: pacemaker_resource_restart.sh ControllerPostPuppetRestartDeployment: type: OS::Heat::SoftwareDeployments diff --git a/extraconfig/tasks/pre_puppet_pacemaker.yaml b/extraconfig/tasks/pre_puppet_pacemaker.yaml index 2cfe92a7..82546588 100644 --- a/extraconfig/tasks/pre_puppet_pacemaker.yaml +++ b/extraconfig/tasks/pre_puppet_pacemaker.yaml @@ -14,13 +14,8 @@ resources: type: OS::Heat::SoftwareConfig properties: group: script - config: | - #!/bin/bash - pacemaker_status=$(systemctl is-active pacemaker) - - if [ "$pacemaker_status" = "active" ]; then - pcs property set maintenance-mode=true - fi + config: + get_file: pacemaker_maintenance_mode.sh ControllerPrePuppetMaintenanceModeDeployment: type: OS::Heat::SoftwareDeployments diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh index c6313d9d..b045e5ea 100755 --- a/extraconfig/tasks/yum_update.sh +++ b/extraconfig/tasks/yum_update.sh @@ -23,7 +23,7 @@ update_identifier=${update_identifier//[^a-zA-Z0-9-_]/} # seconds to wait for this node to rejoin the cluster after update cluster_start_timeout=600 -galera_sync_timeout=360 +galera_sync_timeout=1800 cluster_settle_timeout=1800 timestamp_file="$timestamp_dir/$update_identifier" @@ -43,97 +43,8 @@ if [[ "$list_updates" == "" ]]; then fi pacemaker_status=$(systemctl is-active pacemaker) -pacemaker_dumpfile=$(mktemp) if [[ "$pacemaker_status" == "active" ]] ; then -SERVICES="memcached -httpd -neutron-dhcp-agent -neutron-l3-agent -neutron-metadata-agent -neutron-openvswitch-agent -neutron-server -openstack-ceilometer-alarm-evaluator -openstack-ceilometer-alarm-notifier -openstack-ceilometer-api -openstack-ceilometer-central -openstack-ceilometer-collector -openstack-ceilometer-notification -openstack-cinder-api -openstack-cinder-scheduler -openstack-cinder-volume -openstack-glance-api -openstack-glance-registry -openstack-heat-api -openstack-heat-api-cfn -openstack-heat-api-cloudwatch -openstack-heat-engine -openstack-keystone -openstack-nova-api -openstack-nova-conductor -openstack-nova-consoleauth -openstack-nova-novncproxy -openstack-nova-scheduler" - - echo "Dumping Pacemaker config" - pcs cluster cib $pacemaker_dumpfile - - echo "Checking for missing constraints" - - if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then - pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone - fi - - if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone - fi - - if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone - fi - - if pcs resource | grep "haproxy-clone"; then - SERVICES="$SERVICES haproxy" - if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone - fi - fi - - if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone - fi - - if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then - pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false - fi - - # ensure neutron constraints https://review.openstack.org/#/c/229466 - # remove ovs-cleanup after server and add openvswitch-agent instead - if pcs constraint order show | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then - pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory - fi - if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then - pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone - fi - - - if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then - pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY - fi - - echo "Setting resource start/stop timeouts" - for service in $SERVICES; do - pcs -f $pacemaker_dumpfile resource update $service op start timeout=200s op stop timeout=200s - done - # mongod start timeout is higher, setting only stop timeout - pcs -f $pacemaker_dumpfile resource update mongod op start timeout=370s op stop timeout=200s - - echo "Applying new Pacemaker config" - if ! pcs cluster cib-push $pacemaker_dumpfile; then - echo "ERROR failed to apply new pacemaker config" - exit 1 - fi - echo "Pacemaker running, stopping cluster node and doing full package update" node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*") if [[ "$node_count" == "1" ]] ; then @@ -142,23 +53,16 @@ openstack-nova-scheduler" else pcs cluster stop fi - - # clean leftover keepalived and radvd instances from neutron - # (can be removed when we remove neutron-netns-cleanup from cluster services) - # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init - killall neutron-keepalived-state-change 2>/dev/null || : - kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || : - kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || : else - echo "Excluding upgrading packages that are handled by config management tooling" - command_arguments="$command_arguments --skip-broken" - for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do - command_arguments="$command_arguments --exclude $exclude" - done + echo "Upgrading openstack-puppet-modules" + yum -q -y update openstack-puppet-modules + echo "Upgrading other packages is handled by config management tooling" + echo -n "true" > $heat_outputs_path.update_managed_packages + exit 0 fi command=${command:-update} -full_command="yum -y $command $command_arguments" +full_command="yum -q -y $command $command_arguments" echo "Running: $full_command" result=$($full_command) @@ -199,9 +103,6 @@ if [[ "$pacemaker_status" == "active" ]] ; then fi pcs status - -else - echo -n "true" > $heat_outputs_path.update_managed_packages fi echo "Finished yum_update.sh on server $deploy_server_id at `date`" diff --git a/extraconfig/tasks/yum_update_noop.yaml b/extraconfig/tasks/yum_update_noop.yaml new file mode 100644 index 00000000..b759d9c5 --- /dev/null +++ b/extraconfig/tasks/yum_update_noop.yaml @@ -0,0 +1,29 @@ +heat_template_version: 2014-10-16 +description: 'No-op yum update task' + +resources: + + config: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: | + #!/bin/bash + echo -n "false" > $heat_outputs_path.update_managed_packages + inputs: + - name: update_identifier + description: yum will only run for previously unused values of update_identifier + default: '' + - name: command + description: yum sub-command to run, defaults to "update" + default: update + - name: command_arguments + description: yum command arguments, defaults to "" + default: '' + outputs: + - name: update_managed_packages + description: boolean value indicating whether to upgrade managed packages + +outputs: + OS::stack_id: + value: {get_resource: config} |