aboutsummaryrefslogtreecommitdiffstats
path: root/extraconfig/tasks
diff options
context:
space:
mode:
Diffstat (limited to 'extraconfig/tasks')
-rw-r--r--extraconfig/tasks/major_upgrade_block_storage.sh8
-rw-r--r--extraconfig/tasks/major_upgrade_ceph_storage.sh35
-rw-r--r--extraconfig/tasks/major_upgrade_compute.sh26
-rwxr-xr-xextraconfig/tasks/major_upgrade_controller_pacemaker_1.sh59
-rwxr-xr-xextraconfig/tasks/major_upgrade_controller_pacemaker_2.sh70
-rw-r--r--extraconfig/tasks/major_upgrade_object_storage.sh39
-rw-r--r--extraconfig/tasks/major_upgrade_pacemaker.yaml85
-rw-r--r--extraconfig/tasks/major_upgrade_pacemaker_init.yaml131
-rw-r--r--extraconfig/tasks/major_upgrade_pacemaker_migrations.sh75
-rw-r--r--extraconfig/tasks/noop.yaml10
-rwxr-xr-xextraconfig/tasks/pacemaker_common_functions.sh62
-rwxr-xr-xextraconfig/tasks/pacemaker_maintenance_mode.sh19
-rwxr-xr-xextraconfig/tasks/pacemaker_resource_restart.sh42
-rw-r--r--extraconfig/tasks/post_puppet_pacemaker.yaml6
-rw-r--r--extraconfig/tasks/pre_puppet_pacemaker.yaml9
-rwxr-xr-xextraconfig/tasks/yum_update.sh113
-rw-r--r--extraconfig/tasks/yum_update_noop.yaml29
17 files changed, 657 insertions, 161 deletions
diff --git a/extraconfig/tasks/major_upgrade_block_storage.sh b/extraconfig/tasks/major_upgrade_block_storage.sh
new file mode 100644
index 00000000..07666245
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_block_storage.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+#
+# This runs an upgrade of Cinder Block Storage nodes.
+#
+set -eu
+
+yum -y install python-zaqarclient # needed for os-collect-config
+yum -y -q update
diff --git a/extraconfig/tasks/major_upgrade_ceph_storage.sh b/extraconfig/tasks/major_upgrade_ceph_storage.sh
new file mode 100644
index 00000000..de42b16d
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_ceph_storage.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+#
+# This delivers the ceph-storage upgrade script to be invoked as part of the tripleo
+# major upgrade workflow.
+#
+set -eu
+
+UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh
+
+cat > $UPGRADE_SCRIPT << ENDOFCAT
+### DO NOT MODIFY THIS FILE
+### This file is automatically delivered to the ceph-storage nodes as part of the
+### tripleo upgrades workflow
+
+
+function systemctl_ceph {
+ action=\$1
+ systemctl \$action ceph
+}
+
+# "so that mirrors aren't rebalanced as if the OSD died" - gfidente
+ceph osd set noout
+
+systemctl_ceph stop
+yum -y install python-zaqarclient # needed for os-collect-config
+yum -y update
+systemctl_ceph start
+
+ceph osd unset noout
+
+ENDOFCAT
+
+# ensure the permissions are OK
+chmod 0755 $UPGRADE_SCRIPT
+
diff --git a/extraconfig/tasks/major_upgrade_compute.sh b/extraconfig/tasks/major_upgrade_compute.sh
new file mode 100644
index 00000000..78628c8c
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_compute.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+#
+# This delivers the compute upgrade script to be invoked as part of the tripleo
+# major upgrade workflow.
+#
+set -eu
+
+UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh
+
+cat > $UPGRADE_SCRIPT << ENDOFCAT
+### DO NOT MODIFY THIS FILE
+### This file is automatically delivered to the compute nodes as part of the
+### tripleo upgrades workflow
+
+# pin nova to kilo (messaging +-1) for the nova-compute service
+
+crudini --set /etc/nova/nova.conf upgrade_levels compute $upgrade_level_nova_compute
+
+yum -y install python-zaqarclient # needed for os-collect-config
+yum -y update
+
+ENDOFCAT
+
+# ensure the permissions are OK
+chmod 0755 $UPGRADE_SCRIPT
+
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
new file mode 100755
index 00000000..f5399222
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+set -eu
+
+cluster_sync_timeout=600
+
+if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then
+ echo_error "ERROR: upgrade cannot start with some cluster nodes being offline"
+ exit 1
+fi
+
+if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
+ pcs resource disable httpd
+ check_resource httpd stopped 1800
+ pcs resource disable openstack-core
+ check_resource openstack-core stopped 1800
+ pcs resource disable redis
+ check_resource redis stopped 600
+ pcs resource disable mongod
+ check_resource mongod stopped 600
+ pcs resource disable rabbitmq
+ check_resource rabbitmq stopped 600
+ pcs resource disable memcached
+ check_resource memcached stopped 600
+ pcs resource disable galera
+ check_resource galera stopped 600
+ # Disable all VIPs before stopping the cluster, so that pcs doesn't use one as a source address:
+ # https://bugzilla.redhat.com/show_bug.cgi?id=1330688
+ for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Started | awk '{ print $1 }'); do
+ pcs resource disable $vip
+ check_resource $vip stopped 60
+ done
+ pcs cluster stop --all
+fi
+
+# Swift isn't controled by pacemaker
+systemctl_swift stop
+
+tstart=$(date +%s)
+while systemctl is-active pacemaker; do
+ sleep 5
+ tnow=$(date +%s)
+ if (( tnow-tstart > cluster_sync_timeout )) ; then
+ echo_error "ERROR: cluster shutdown timed out"
+ exit 1
+ fi
+done
+
+yum -y install python-zaqarclient # needed for os-collect-config
+yum -y -q update
+
+# Pin messages sent to compute nodes to kilo, these will be upgraded later
+crudini --set /etc/nova/nova.conf upgrade_levels compute "$upgrade_level_nova_compute"
+# https://bugzilla.redhat.com/show_bug.cgi?id=1284047
+# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435
+crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit
+# https://bugzilla.redhat.com/show_bug.cgi?id=1284058
+# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists
+crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server"
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
new file mode 100755
index 00000000..643ae57f
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+set -eu
+
+cluster_form_timeout=600
+cluster_settle_timeout=600
+galera_sync_timeout=600
+
+if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
+ pcs cluster start --all
+
+ tstart=$(date +%s)
+ while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do
+ sleep 5
+ tnow=$(date +%s)
+ if (( tnow-tstart > cluster_form_timeout )) ; then
+ echo_error "ERROR: timed out forming the cluster"
+ exit 1
+ fi
+ done
+
+ if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
+ echo_error "ERROR: timed out waiting for cluster to finish transition"
+ exit 1
+ fi
+
+ for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do
+ pcs resource enable $vip
+ check_resource $vip started 60
+ done
+
+ pcs resource enable galera
+ check_resource galera started 600
+ pcs resource enable mongod
+ check_resource mongod started 600
+
+ tstart=$(date +%s)
+ while ! clustercheck; do
+ sleep 5
+ tnow=$(date +%s)
+ if (( tnow-tstart > galera_sync_timeout )) ; then
+ echo_error "ERROR galera sync timed out"
+ exit 1
+ fi
+ done
+
+ # Run all the db syncs
+ # TODO: check if this can be triggered in puppet and removed from here
+ ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf
+ cinder-manage db sync
+ glance-manage --config-file=/etc/glance/glance-registry.conf db_sync
+ heat-manage --config-file /etc/heat/heat.conf db_sync
+ keystone-manage db_sync
+ neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
+ nova-manage db sync
+
+ pcs resource enable memcached
+ check_resource memcached started 600
+ pcs resource enable rabbitmq
+ check_resource rabbitmq started 600
+ pcs resource enable redis
+ check_resource redis started 600
+ pcs resource enable openstack-core
+ check_resource openstack-core started 1800
+ pcs resource enable httpd
+ check_resource httpd started 1800
+fi
+
+# Swift isn't controled by heat
+systemctl_swift start
diff --git a/extraconfig/tasks/major_upgrade_object_storage.sh b/extraconfig/tasks/major_upgrade_object_storage.sh
new file mode 100644
index 00000000..931f4f42
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_object_storage.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+#
+# This delivers the swift-storage upgrade script to be invoked as part of the tripleo
+# major upgrade workflow.
+#
+set -eu
+
+UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh
+
+cat > $UPGRADE_SCRIPT << ENDOFCAT
+### DO NOT MODIFY THIS FILE
+### This file is automatically delivered to the swift-storage nodes as part of the
+### tripleo upgrades workflow
+
+
+function systemctl_swift {
+ action=\$1
+ for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
+ openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
+ openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object; do
+ systemctl \$action \$S
+ done
+}
+
+
+systemctl_swift stop
+
+yum -y install python-zaqarclient # needed for os-collect-config
+yum -y update
+
+systemctl_swift start
+
+
+
+ENDOFCAT
+
+# ensure the permissions are OK
+chmod 0755 $UPGRADE_SCRIPT
+
diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml
new file mode 100644
index 00000000..4af3186c
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml
@@ -0,0 +1,85 @@
+heat_template_version: 2014-10-16
+description: 'Upgrade for Pacemaker deployments'
+
+parameters:
+ controller_servers:
+ type: json
+ compute_servers:
+ type: json
+ blockstorage_servers:
+ type: json
+ objectstorage_servers:
+ type: json
+ cephstorage_servers:
+ type: json
+ input_values:
+ type: json
+ description: input values for the software deployments
+
+ UpgradeLevelNovaCompute:
+ type: string
+ description: Nova Compute upgrade level
+ default: ''
+
+resources:
+ # TODO(jistr): for Mitaka->Newton upgrades and further we can use
+ # map_merge with input_values instead of feeding params into scripts
+ # via str_replace on bash snippets
+
+ ControllerPacemakerUpgradeConfig_Step1:
+ type: OS::Heat::SoftwareConfig
+ properties:
+ group: script
+ config:
+ list_join:
+ - ''
+ - - str_replace:
+ template: |
+ #!/bin/bash
+ upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE'
+ params:
+ UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute}
+ - get_file: pacemaker_common_functions.sh
+ - get_file: major_upgrade_pacemaker_migrations.sh
+ - get_file: major_upgrade_controller_pacemaker_1.sh
+
+ ControllerPacemakerUpgradeDeployment_Step1:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: controller_servers}
+ config: {get_resource: ControllerPacemakerUpgradeConfig_Step1}
+ input_values: {get_param: input_values}
+
+ BlockStorageUpgradeConfig:
+ type: OS::Heat::SoftwareConfig
+ depends_on: ControllerPacemakerUpgradeDeployment_Step1
+ properties:
+ group: script
+ config: {get_file: major_upgrade_block_storage.sh}
+
+ BlockStorageUpgradeDeployment:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: blockstorage_servers}
+ config: {get_resource: BlockStorageUpgradeConfig}
+ input_values: {get_param: input_values}
+
+ ControllerPacemakerUpgradeConfig_Step2:
+ type: OS::Heat::SoftwareConfig
+ properties:
+ group: script
+ config:
+ list_join:
+ - ''
+ - - get_file: pacemaker_common_functions.sh
+ - get_file: major_upgrade_pacemaker_migrations.sh
+ - get_file: major_upgrade_controller_pacemaker_2.sh
+
+ ControllerPacemakerUpgradeDeployment_Step2:
+ type: OS::Heat::SoftwareDeploymentGroup
+ depends_on: BlockStorageUpgradeDeployment
+ properties:
+ servers: {get_param: controller_servers}
+ config: {get_resource: ControllerPacemakerUpgradeConfig_Step2}
+ input_values: {get_param: input_values}
+
diff --git a/extraconfig/tasks/major_upgrade_pacemaker_init.yaml b/extraconfig/tasks/major_upgrade_pacemaker_init.yaml
new file mode 100644
index 00000000..623549a0
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_pacemaker_init.yaml
@@ -0,0 +1,131 @@
+heat_template_version: 2014-10-16
+description: 'Upgrade for Pacemaker deployments'
+
+parameters:
+
+ controller_servers:
+ type: json
+ compute_servers:
+ type: json
+ blockstorage_servers:
+ type: json
+ objectstorage_servers:
+ type: json
+ cephstorage_servers:
+ type: json
+ input_values:
+ type: json
+ description: input values for the software deployments
+
+ UpgradeInitCommand:
+ type: string
+ description: |
+ Command or script snippet to run on all overcloud nodes to
+ initialize the upgrade process. E.g. a repository switch.
+ default: ''
+ UpgradeLevelNovaCompute:
+ type: string
+ description: Nova Compute upgrade level
+ default: ''
+
+resources:
+
+ # For the UpgradeInit also rename /etc/resolv.conf.save for +bug/1567004
+
+ UpgradeInitConfig:
+ type: OS::Heat::SoftwareConfig
+ properties:
+ group: script
+ config:
+ list_join:
+ - ''
+ - - "#!/bin/bash\n\n"
+ - "if [[ -f /etc/resolv.conf.save ]] ; then rm /etc/resolv.conf.save; fi\n\n"
+ - get_param: UpgradeInitCommand
+
+ UpgradeInitControllerDeployment:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: controller_servers}
+ config: {get_resource: UpgradeInitConfig}
+ input_values: {get_param: input_values}
+
+ UpgradeInitComputeDeployment:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: compute_servers}
+ config: {get_resource: UpgradeInitConfig}
+ input_values: {get_param: input_values}
+
+ UpgradeInitBlockStorageDeployment:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: blockstorage_servers}
+ config: {get_resource: UpgradeInitConfig}
+ input_values: {get_param: input_values}
+
+ UpgradeInitObjectStorageDeployment:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: objectstorage_servers}
+ config: {get_resource: UpgradeInitConfig}
+ input_values: {get_param: input_values}
+
+ UpgradeInitCephStorageDeployment:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: cephstorage_servers}
+ config: {get_resource: UpgradeInitConfig}
+ input_values: {get_param: input_values}
+
+ # TODO(jistr): for Mitaka->Newton upgrades and further we can use
+ # map_merge with input_values instead of feeding params into scripts
+ # via str_replace on bash snippets
+
+ ComputeDeliverUpgradeScriptConfig:
+ type: OS::Heat::SoftwareConfig
+ properties:
+ group: script
+ config:
+ list_join:
+ - ''
+ - - str_replace:
+ template: |
+ #!/bin/bash
+ upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE'
+ params:
+ UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute}
+ - get_file: major_upgrade_compute.sh
+
+ ComputeDeliverUpgradeScriptDeployment:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: compute_servers}
+ config: {get_resource: ComputeDeliverUpgradeScriptConfig}
+ input_values: {get_param: input_values}
+
+ ObjectStorageDeliverUpgradeScriptConfig:
+ type: OS::Heat::SoftwareConfig
+ properties:
+ group: script
+ config: {get_file: major_upgrade_object_storage.sh}
+
+ ObjectStorageDeliverUpgradeScriptDeployment:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: objectstorage_servers}
+ config: {get_resource: ObjectStorageDeliverUpgradeScriptConfig}
+ input_values: {get_param: input_values}
+
+ CephStorageDeliverUpgradeScriptConfig:
+ type: OS::Heat::SoftwareConfig
+ properties:
+ group: script
+ config: {get_file: major_upgrade_ceph_storage.sh}
+
+ CephStorageDeliverUpgradeScriptDeployment:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: cephstorage_servers}
+ config: {get_resource: CephStorageDeliverUpgradeScriptConfig}
+ input_values: {get_param: input_values}
diff --git a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh
new file mode 100644
index 00000000..b63198db
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+# Special pieces of upgrade migration logic go into this
+# file. E.g. Pacemaker cluster transitions for existing deployments,
+# matching changes to overcloud_controller_pacemaker.pp (Puppet
+# handles deployment, this file handles migrations).
+#
+# This file shouldn't execute any action on its own, all logic should
+# be wrapped into bash functions. Upgrade scripts will source this
+# file and call the functions defined in this file where appropriate.
+#
+# The migration functions should be idempotent. If the migration has
+# been already applied, it should be possible to call the function
+# again without damaging the deployment or failing the upgrade.
+
+function add_missing_openstack_core_constraints {
+ # The CIBs are saved under /root as they might contain sensitive data
+ CIB="/root/migration.cib"
+ CIB_BACKUP="/root/backup.cib"
+ CIB_PUSH_NEEDED=n
+
+ rm -f "$CIB" "$CIB_BACKUP" || /bin/true
+ pcs cluster cib "$CIB"
+ cp "$CIB" "$CIB_BACKUP"
+
+ if ! pcs -f "$CIB" constraint --full | grep 'start openstack-sahara-api-clone then start openstack-sahara-engine-clone'; then
+ pcs -f "$CIB" constraint order start openstack-sahara-api-clone then start openstack-sahara-engine-clone
+ CIB_PUSH_NEEDED=y
+ fi
+
+ if ! pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-ceilometer-notification-clone'; then
+ pcs -f "$CIB" constraint order start openstack-core-clone then start openstack-ceilometer-notification-clone
+ CIB_PUSH_NEEDED=y
+ fi
+
+ if ! pcs -f "$CIB" constraint --full | grep 'start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone'; then
+ pcs -f "$CIB" constraint order start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone
+ CIB_PUSH_NEEDED=y
+ fi
+
+ if pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone'; then
+ CID=$(pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone' | sed -e 's/.*id\://g' -e 's/)//g')
+ pcs -f "$CIB" constraint remove $CID
+ CIB_PUSH_NEEDED=y
+ fi
+
+ if [ "$CIB_PUSH_NEEDED" = 'y' ]; then
+ pcs cluster cib-push "$CIB"
+ fi
+}
+
+function remove_ceilometer_alarm {
+ if pcs status | grep openstack-ceilometer-alarm; then
+ # Disable pacemaker resources for ceilometer-alarms
+ pcs resource disable openstack-ceilometer-alarm-evaluator
+ check_resource openstack-ceilometer-alarm-evaluator stopped 600
+ pcs resource delete openstack-ceilometer-alarm-evaluator
+ pcs resource disable openstack-ceilometer-alarm-notifier
+ check_resource openstack-ceilometer-alarm-notifier stopped 600
+ pcs resource delete openstack-ceilometer-alarm-notifier
+
+ # remove constraints
+ pcs constraint remove ceilometer-delay-then-ceilometer-alarm-evaluator-constraint
+ pcs constraint remove ceilometer-alarm-evaluator-with-ceilometer-delay-colocation
+ pcs constraint remove ceilometer-alarm-evaluator-then-ceilometer-alarm-notifier-constraint
+ pcs constraint remove ceilometer-alarm-notifier-with-ceilometer-alarm-evaluator-colocation
+ pcs constraint remove ceilometer-alarm-notifier-then-ceilometer-notification-constraint
+ pcs constraint remove ceilometer-notification-with-ceilometer-alarm-notifier-colocation
+
+ fi
+
+ # uninstall openstack-ceilometer-alarm package
+ yum -y remove openstack-ceilometer-alarm
+
+}
diff --git a/extraconfig/tasks/noop.yaml b/extraconfig/tasks/noop.yaml
deleted file mode 100644
index 0cff7469..00000000
--- a/extraconfig/tasks/noop.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-heat_template_version: 2014-10-16
-description: 'No-op task'
-
-parameters:
- servers:
- type: json
- input_values:
- type: json
- default: {}
- description: input values for the software deployments
diff --git a/extraconfig/tasks/pacemaker_common_functions.sh b/extraconfig/tasks/pacemaker_common_functions.sh
new file mode 100755
index 00000000..7d794c97
--- /dev/null
+++ b/extraconfig/tasks/pacemaker_common_functions.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+set -eu
+
+function check_resource {
+
+ if [ "$#" -ne 3 ]; then
+ echo_error "ERROR: check_resource function expects 3 parameters, $# given"
+ exit 1
+ fi
+
+ service=$1
+ state=$2
+ timeout=$3
+
+ if [ "$state" = "stopped" ]; then
+ match_for_incomplete='Started'
+ else # started
+ match_for_incomplete='Stopped'
+ fi
+
+ nodes_local=$(pcs status | grep ^Online | sed 's/.*\[ \(.*\) \]/\1/g' | sed 's/ /\|/g')
+ if timeout -k 10 $timeout crm_resource --wait; then
+ node_states=$(pcs status --full | grep "$service" | grep -v Clone | { egrep "$nodes_local" || true; } )
+ if echo "$node_states" | grep -q "$match_for_incomplete"; then
+ echo_error "ERROR: cluster finished transition but $service was not in $state state, exiting."
+ exit 1
+ else
+ echo "$service has $state"
+ fi
+ else
+ echo_error "ERROR: cluster remained unstable for more than $timeout seconds, exiting."
+ exit 1
+ fi
+
+}
+
+function echo_error {
+ echo "$@" | tee /dev/fd2
+}
+
+function systemctl_swift {
+ services=( openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
+ openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
+ openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy )
+ action=$1
+ case $action in
+ stop)
+ services=$(systemctl | grep swift | grep running | awk '{print $1}')
+ ;;
+ start)
+ enable_swift_storage=$(hiera -c /etc/puppet/hiera.yaml 'enable_swift_storage')
+ if [[ $enable_swift_storage != "true" ]]; then
+ services=( openstack-swift-proxy )
+ fi
+ ;;
+ *) services=() ;; # for safetly, should never happen
+ esac
+ for S in ${services[@]}; do
+ systemctl $action $S
+ done
+}
diff --git a/extraconfig/tasks/pacemaker_maintenance_mode.sh b/extraconfig/tasks/pacemaker_maintenance_mode.sh
new file mode 100755
index 00000000..ddc84ad2
--- /dev/null
+++ b/extraconfig/tasks/pacemaker_maintenance_mode.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+set -x
+
+# On initial deployment, the pacemaker service is disabled and is-active exits
+# 3 in that case, so allow this to fail gracefully.
+pacemaker_status=$(systemctl is-active pacemaker || :)
+
+if [ "$pacemaker_status" = "active" ]; then
+ pcs property set maintenance-mode=true
+fi
+
+# We need to reload haproxy in case the certificate changed because
+# puppet doesn't know the contents of the cert file. We shouldn't
+# reload it if it wasn't already active (such as if using external
+# loadbalancer or on initial deployment).
+haproxy_status=$(systemctl is-active haproxy || :)
+if [ "$haproxy_status" = "active" ]; then
+ systemctl reload haproxy
+fi
diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh
index 12201097..b2e5be16 100755
--- a/extraconfig/tasks/pacemaker_resource_restart.sh
+++ b/extraconfig/tasks/pacemaker_resource_restart.sh
@@ -3,44 +3,12 @@
set -eux
pacemaker_status=$(systemctl is-active pacemaker)
-check_interval=3
-
-function check_resource {
-
- service=$1
- state=$2
- timeout=$3
- tstart=$(date +%s)
- tend=$(( $tstart + $timeout ))
-
- if [ "$state" = "stopped" ]; then
- match_for_incomplete='Started'
- else # started
- match_for_incomplete='Stopped'
- fi
-
- while (( $(date +%s) < $tend )); do
- node_states=$(pcs status --full | grep "$service" | grep -v Clone)
- if echo "$node_states" | grep -q "$match_for_incomplete"; then
- echo "$service not yet $state, sleeping $check_interval seconds."
- sleep $check_interval
- else
- echo "$service has $state"
- timeout -k 10 $timeout crm_resource --wait
- return
- fi
- done
-
- echo "$service never $state after $timeout seconds" | tee /dev/fd/2
- exit 1
-
-}
# Run if pacemaker is running, we're the bootstrap node,
# and we're updating the deployment (not creating).
if [ "$pacemaker_status" = "active" -a \
"$(hiera bootstrap_nodeid)" = "$(facter hostname)" -a \
- "$(hiera update_identifier)" != "nil" ]; then
+ "$(hiera stack_action)" = "UPDATE" ]; then
#ensure neutron constraints like
#https://review.openstack.org/#/c/245093/
@@ -50,8 +18,8 @@ if [ "$pacemaker_status" = "active" -a \
pcs resource disable httpd
check_resource httpd stopped 300
- pcs resource disable openstack-keystone
- check_resource openstack-keystone stopped 1800
+ pcs resource disable openstack-core
+ check_resource openstack-core stopped 1800
if pcs status | grep haproxy-clone; then
pcs resource restart haproxy-clone
@@ -62,8 +30,8 @@ if [ "$pacemaker_status" = "active" -a \
pcs resource restart memcached-clone
pcs resource restart galera-master
- pcs resource enable openstack-keystone
- check_resource openstack-keystone started 1800
+ pcs resource enable openstack-core
+ check_resource openstack-core started 1800
pcs resource enable httpd
check_resource httpd started 800
diff --git a/extraconfig/tasks/post_puppet_pacemaker.yaml b/extraconfig/tasks/post_puppet_pacemaker.yaml
index 7de41d94..fbed9ce5 100644
--- a/extraconfig/tasks/post_puppet_pacemaker.yaml
+++ b/extraconfig/tasks/post_puppet_pacemaker.yaml
@@ -33,7 +33,11 @@ resources:
type: OS::Heat::SoftwareConfig
properties:
group: script
- config: {get_file: pacemaker_resource_restart.sh}
+ config:
+ list_join:
+ - ''
+ - - get_file: pacemaker_common_functions.sh
+ - get_file: pacemaker_resource_restart.sh
ControllerPostPuppetRestartDeployment:
type: OS::Heat::SoftwareDeployments
diff --git a/extraconfig/tasks/pre_puppet_pacemaker.yaml b/extraconfig/tasks/pre_puppet_pacemaker.yaml
index 2cfe92a7..82546588 100644
--- a/extraconfig/tasks/pre_puppet_pacemaker.yaml
+++ b/extraconfig/tasks/pre_puppet_pacemaker.yaml
@@ -14,13 +14,8 @@ resources:
type: OS::Heat::SoftwareConfig
properties:
group: script
- config: |
- #!/bin/bash
- pacemaker_status=$(systemctl is-active pacemaker)
-
- if [ "$pacemaker_status" = "active" ]; then
- pcs property set maintenance-mode=true
- fi
+ config:
+ get_file: pacemaker_maintenance_mode.sh
ControllerPrePuppetMaintenanceModeDeployment:
type: OS::Heat::SoftwareDeployments
diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh
index c6313d9d..b045e5ea 100755
--- a/extraconfig/tasks/yum_update.sh
+++ b/extraconfig/tasks/yum_update.sh
@@ -23,7 +23,7 @@ update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
# seconds to wait for this node to rejoin the cluster after update
cluster_start_timeout=600
-galera_sync_timeout=360
+galera_sync_timeout=1800
cluster_settle_timeout=1800
timestamp_file="$timestamp_dir/$update_identifier"
@@ -43,97 +43,8 @@ if [[ "$list_updates" == "" ]]; then
fi
pacemaker_status=$(systemctl is-active pacemaker)
-pacemaker_dumpfile=$(mktemp)
if [[ "$pacemaker_status" == "active" ]] ; then
-SERVICES="memcached
-httpd
-neutron-dhcp-agent
-neutron-l3-agent
-neutron-metadata-agent
-neutron-openvswitch-agent
-neutron-server
-openstack-ceilometer-alarm-evaluator
-openstack-ceilometer-alarm-notifier
-openstack-ceilometer-api
-openstack-ceilometer-central
-openstack-ceilometer-collector
-openstack-ceilometer-notification
-openstack-cinder-api
-openstack-cinder-scheduler
-openstack-cinder-volume
-openstack-glance-api
-openstack-glance-registry
-openstack-heat-api
-openstack-heat-api-cfn
-openstack-heat-api-cloudwatch
-openstack-heat-engine
-openstack-keystone
-openstack-nova-api
-openstack-nova-conductor
-openstack-nova-consoleauth
-openstack-nova-novncproxy
-openstack-nova-scheduler"
-
- echo "Dumping Pacemaker config"
- pcs cluster cib $pacemaker_dumpfile
-
- echo "Checking for missing constraints"
-
- if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
- pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
- fi
-
- if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
- pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone
- fi
-
- if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
- pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone
- fi
-
- if pcs resource | grep "haproxy-clone"; then
- SERVICES="$SERVICES haproxy"
- if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
- pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone
- fi
- fi
-
- if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
- pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone
- fi
-
- if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
- pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
- fi
-
- # ensure neutron constraints https://review.openstack.org/#/c/229466
- # remove ovs-cleanup after server and add openvswitch-agent instead
- if pcs constraint order show | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then
- pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory
- fi
- if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then
- pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone
- fi
-
-
- if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
- pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY
- fi
-
- echo "Setting resource start/stop timeouts"
- for service in $SERVICES; do
- pcs -f $pacemaker_dumpfile resource update $service op start timeout=200s op stop timeout=200s
- done
- # mongod start timeout is higher, setting only stop timeout
- pcs -f $pacemaker_dumpfile resource update mongod op start timeout=370s op stop timeout=200s
-
- echo "Applying new Pacemaker config"
- if ! pcs cluster cib-push $pacemaker_dumpfile; then
- echo "ERROR failed to apply new pacemaker config"
- exit 1
- fi
-
echo "Pacemaker running, stopping cluster node and doing full package update"
node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
if [[ "$node_count" == "1" ]] ; then
@@ -142,23 +53,16 @@ openstack-nova-scheduler"
else
pcs cluster stop
fi
-
- # clean leftover keepalived and radvd instances from neutron
- # (can be removed when we remove neutron-netns-cleanup from cluster services)
- # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init
- killall neutron-keepalived-state-change 2>/dev/null || :
- kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || :
- kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || :
else
- echo "Excluding upgrading packages that are handled by config management tooling"
- command_arguments="$command_arguments --skip-broken"
- for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do
- command_arguments="$command_arguments --exclude $exclude"
- done
+ echo "Upgrading openstack-puppet-modules"
+ yum -q -y update openstack-puppet-modules
+ echo "Upgrading other packages is handled by config management tooling"
+ echo -n "true" > $heat_outputs_path.update_managed_packages
+ exit 0
fi
command=${command:-update}
-full_command="yum -y $command $command_arguments"
+full_command="yum -q -y $command $command_arguments"
echo "Running: $full_command"
result=$($full_command)
@@ -199,9 +103,6 @@ if [[ "$pacemaker_status" == "active" ]] ; then
fi
pcs status
-
-else
- echo -n "true" > $heat_outputs_path.update_managed_packages
fi
echo "Finished yum_update.sh on server $deploy_server_id at `date`"
diff --git a/extraconfig/tasks/yum_update_noop.yaml b/extraconfig/tasks/yum_update_noop.yaml
new file mode 100644
index 00000000..b759d9c5
--- /dev/null
+++ b/extraconfig/tasks/yum_update_noop.yaml
@@ -0,0 +1,29 @@
+heat_template_version: 2014-10-16
+description: 'No-op yum update task'
+
+resources:
+
+ config:
+ type: OS::Heat::SoftwareConfig
+ properties:
+ group: script
+ config: |
+ #!/bin/bash
+ echo -n "false" > $heat_outputs_path.update_managed_packages
+ inputs:
+ - name: update_identifier
+ description: yum will only run for previously unused values of update_identifier
+ default: ''
+ - name: command
+ description: yum sub-command to run, defaults to "update"
+ default: update
+ - name: command_arguments
+ description: yum command arguments, defaults to ""
+ default: ''
+ outputs:
+ - name: update_managed_packages
+ description: boolean value indicating whether to upgrade managed packages
+
+outputs:
+ OS::stack_id:
+ value: {get_resource: config}