diff options
Diffstat (limited to 'extraconfig/tasks')
-rwxr-xr-x | extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh | 12 | ||||
-rwxr-xr-x | extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh | 11 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_pacemaker_init.yaml | 3 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_pacemaker_migrations.sh | 61 | ||||
-rw-r--r-- | extraconfig/tasks/noop.yaml | 26 | ||||
-rwxr-xr-x | extraconfig/tasks/pacemaker_common_functions.sh | 3 | ||||
-rwxr-xr-x | extraconfig/tasks/pacemaker_maintenance_mode.sh | 19 | ||||
-rwxr-xr-x | extraconfig/tasks/pacemaker_resource_restart.sh | 10 | ||||
-rw-r--r-- | extraconfig/tasks/pre_puppet_pacemaker.yaml | 9 | ||||
-rwxr-xr-x | extraconfig/tasks/yum_update.sh | 101 |
10 files changed, 108 insertions, 147 deletions
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh index bf2ee330..f5399222 100755 --- a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh @@ -12,10 +12,8 @@ fi if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then pcs resource disable httpd check_resource httpd stopped 1800 - if pcs status | grep openstack-keystone; then - pcs resource disable openstack-keystone - check_resource openstack-keystone stopped 1800 - fi + pcs resource disable openstack-core + check_resource openstack-core stopped 1800 pcs resource disable redis check_resource redis stopped 600 pcs resource disable mongod @@ -26,6 +24,12 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname) check_resource memcached stopped 600 pcs resource disable galera check_resource galera stopped 600 + # Disable all VIPs before stopping the cluster, so that pcs doesn't use one as a source address: + # https://bugzilla.redhat.com/show_bug.cgi?id=1330688 + for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Started | awk '{ print $1 }'); do + pcs resource disable $vip + check_resource $vip stopped 60 + done pcs cluster stop --all fi diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh index 10bea573..643ae57f 100755 --- a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh @@ -24,6 +24,11 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname) exit 1 fi + for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do + pcs resource enable $vip + check_resource $vip started 60 + done + pcs resource enable galera check_resource galera started 600 pcs resource enable mongod @@ -55,10 +60,8 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname) check_resource rabbitmq started 600 pcs resource enable redis check_resource redis started 600 - if pcs status | grep openstack-keystone; then - pcs resource enable openstack-keystone - check_resource openstack-keystone started 1800 - fi + pcs resource enable openstack-core + check_resource openstack-core started 1800 pcs resource enable httpd check_resource httpd started 1800 fi diff --git a/extraconfig/tasks/major_upgrade_pacemaker_init.yaml b/extraconfig/tasks/major_upgrade_pacemaker_init.yaml index f662bc3d..623549a0 100644 --- a/extraconfig/tasks/major_upgrade_pacemaker_init.yaml +++ b/extraconfig/tasks/major_upgrade_pacemaker_init.yaml @@ -30,6 +30,8 @@ parameters: resources: + # For the UpgradeInit also rename /etc/resolv.conf.save for +bug/1567004 + UpgradeInitConfig: type: OS::Heat::SoftwareConfig properties: @@ -38,6 +40,7 @@ resources: list_join: - '' - - "#!/bin/bash\n\n" + - "if [[ -f /etc/resolv.conf.save ]] ; then rm /etc/resolv.conf.save; fi\n\n" - get_param: UpgradeInitCommand UpgradeInitControllerDeployment: diff --git a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh index 7fd26945..b63198db 100644 --- a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh +++ b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh @@ -12,3 +12,64 @@ # The migration functions should be idempotent. If the migration has # been already applied, it should be possible to call the function # again without damaging the deployment or failing the upgrade. + +function add_missing_openstack_core_constraints { + # The CIBs are saved under /root as they might contain sensitive data + CIB="/root/migration.cib" + CIB_BACKUP="/root/backup.cib" + CIB_PUSH_NEEDED=n + + rm -f "$CIB" "$CIB_BACKUP" || /bin/true + pcs cluster cib "$CIB" + cp "$CIB" "$CIB_BACKUP" + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-sahara-api-clone then start openstack-sahara-engine-clone'; then + pcs -f "$CIB" constraint order start openstack-sahara-api-clone then start openstack-sahara-engine-clone + CIB_PUSH_NEEDED=y + fi + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-ceilometer-notification-clone'; then + pcs -f "$CIB" constraint order start openstack-core-clone then start openstack-ceilometer-notification-clone + CIB_PUSH_NEEDED=y + fi + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone'; then + pcs -f "$CIB" constraint order start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone + CIB_PUSH_NEEDED=y + fi + + if pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone'; then + CID=$(pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone' | sed -e 's/.*id\://g' -e 's/)//g') + pcs -f "$CIB" constraint remove $CID + CIB_PUSH_NEEDED=y + fi + + if [ "$CIB_PUSH_NEEDED" = 'y' ]; then + pcs cluster cib-push "$CIB" + fi +} + +function remove_ceilometer_alarm { + if pcs status | grep openstack-ceilometer-alarm; then + # Disable pacemaker resources for ceilometer-alarms + pcs resource disable openstack-ceilometer-alarm-evaluator + check_resource openstack-ceilometer-alarm-evaluator stopped 600 + pcs resource delete openstack-ceilometer-alarm-evaluator + pcs resource disable openstack-ceilometer-alarm-notifier + check_resource openstack-ceilometer-alarm-notifier stopped 600 + pcs resource delete openstack-ceilometer-alarm-notifier + + # remove constraints + pcs constraint remove ceilometer-delay-then-ceilometer-alarm-evaluator-constraint + pcs constraint remove ceilometer-alarm-evaluator-with-ceilometer-delay-colocation + pcs constraint remove ceilometer-alarm-evaluator-then-ceilometer-alarm-notifier-constraint + pcs constraint remove ceilometer-alarm-notifier-with-ceilometer-alarm-evaluator-colocation + pcs constraint remove ceilometer-alarm-notifier-then-ceilometer-notification-constraint + pcs constraint remove ceilometer-notification-with-ceilometer-alarm-notifier-colocation + + fi + + # uninstall openstack-ceilometer-alarm package + yum -y remove openstack-ceilometer-alarm + +} diff --git a/extraconfig/tasks/noop.yaml b/extraconfig/tasks/noop.yaml deleted file mode 100644 index dbb863be..00000000 --- a/extraconfig/tasks/noop.yaml +++ /dev/null @@ -1,26 +0,0 @@ -heat_template_version: 2014-10-16 -description: 'No-op task' - -parameters: - servers: - type: json - default: [] - controller_servers: - type: json - default: [] - compute_servers: - type: json - default: [] - blockstorage_servers: - type: json - default: [] - objectstorage_servers: - type: json - default: [] - cephstorage_servers: - type: json - default: [] - input_values: - type: json - default: {} - description: input values for the software deployments diff --git a/extraconfig/tasks/pacemaker_common_functions.sh b/extraconfig/tasks/pacemaker_common_functions.sh index 0808763e..7d794c97 100755 --- a/extraconfig/tasks/pacemaker_common_functions.sh +++ b/extraconfig/tasks/pacemaker_common_functions.sh @@ -19,8 +19,9 @@ function check_resource { match_for_incomplete='Stopped' fi + nodes_local=$(pcs status | grep ^Online | sed 's/.*\[ \(.*\) \]/\1/g' | sed 's/ /\|/g') if timeout -k 10 $timeout crm_resource --wait; then - node_states=$(pcs status --full | grep "$service" | grep -v Clone) + node_states=$(pcs status --full | grep "$service" | grep -v Clone | { egrep "$nodes_local" || true; } ) if echo "$node_states" | grep -q "$match_for_incomplete"; then echo_error "ERROR: cluster finished transition but $service was not in $state state, exiting." exit 1 diff --git a/extraconfig/tasks/pacemaker_maintenance_mode.sh b/extraconfig/tasks/pacemaker_maintenance_mode.sh new file mode 100755 index 00000000..ddc84ad2 --- /dev/null +++ b/extraconfig/tasks/pacemaker_maintenance_mode.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -x + +# On initial deployment, the pacemaker service is disabled and is-active exits +# 3 in that case, so allow this to fail gracefully. +pacemaker_status=$(systemctl is-active pacemaker || :) + +if [ "$pacemaker_status" = "active" ]; then + pcs property set maintenance-mode=true +fi + +# We need to reload haproxy in case the certificate changed because +# puppet doesn't know the contents of the cert file. We shouldn't +# reload it if it wasn't already active (such as if using external +# loadbalancer or on initial deployment). +haproxy_status=$(systemctl is-active haproxy || :) +if [ "$haproxy_status" = "active" ]; then + systemctl reload haproxy +fi diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh index b2bdc55a..b2e5be16 100755 --- a/extraconfig/tasks/pacemaker_resource_restart.sh +++ b/extraconfig/tasks/pacemaker_resource_restart.sh @@ -8,7 +8,7 @@ pacemaker_status=$(systemctl is-active pacemaker) # and we're updating the deployment (not creating). if [ "$pacemaker_status" = "active" -a \ "$(hiera bootstrap_nodeid)" = "$(facter hostname)" -a \ - "$(hiera update_identifier)" != "nil" ]; then + "$(hiera stack_action)" = "UPDATE" ]; then #ensure neutron constraints like #https://review.openstack.org/#/c/245093/ @@ -18,8 +18,8 @@ if [ "$pacemaker_status" = "active" -a \ pcs resource disable httpd check_resource httpd stopped 300 - pcs resource disable openstack-keystone - check_resource openstack-keystone stopped 1800 + pcs resource disable openstack-core + check_resource openstack-core stopped 1800 if pcs status | grep haproxy-clone; then pcs resource restart haproxy-clone @@ -30,8 +30,8 @@ if [ "$pacemaker_status" = "active" -a \ pcs resource restart memcached-clone pcs resource restart galera-master - pcs resource enable openstack-keystone - check_resource openstack-keystone started 1800 + pcs resource enable openstack-core + check_resource openstack-core started 1800 pcs resource enable httpd check_resource httpd started 800 diff --git a/extraconfig/tasks/pre_puppet_pacemaker.yaml b/extraconfig/tasks/pre_puppet_pacemaker.yaml index 2cfe92a7..82546588 100644 --- a/extraconfig/tasks/pre_puppet_pacemaker.yaml +++ b/extraconfig/tasks/pre_puppet_pacemaker.yaml @@ -14,13 +14,8 @@ resources: type: OS::Heat::SoftwareConfig properties: group: script - config: | - #!/bin/bash - pacemaker_status=$(systemctl is-active pacemaker) - - if [ "$pacemaker_status" = "active" ]; then - pcs property set maintenance-mode=true - fi + config: + get_file: pacemaker_maintenance_mode.sh ControllerPrePuppetMaintenanceModeDeployment: type: OS::Heat::SoftwareDeployments diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh index 59e4be45..b045e5ea 100755 --- a/extraconfig/tasks/yum_update.sh +++ b/extraconfig/tasks/yum_update.sh @@ -23,7 +23,7 @@ update_identifier=${update_identifier//[^a-zA-Z0-9-_]/} # seconds to wait for this node to rejoin the cluster after update cluster_start_timeout=600 -galera_sync_timeout=360 +galera_sync_timeout=1800 cluster_settle_timeout=1800 timestamp_file="$timestamp_dir/$update_identifier" @@ -43,100 +43,8 @@ if [[ "$list_updates" == "" ]]; then fi pacemaker_status=$(systemctl is-active pacemaker) -pacemaker_dumpfile=$(mktemp) if [[ "$pacemaker_status" == "active" ]] ; then -SERVICES="memcached -httpd -neutron-dhcp-agent -neutron-l3-agent -neutron-metadata-agent -neutron-openvswitch-agent -neutron-server -openstack-ceilometer-alarm-evaluator -openstack-ceilometer-alarm-notifier -openstack-ceilometer-api -openstack-ceilometer-central -openstack-ceilometer-collector -openstack-ceilometer-notification -openstack-cinder-api -openstack-cinder-scheduler -openstack-cinder-volume -openstack-glance-api -openstack-glance-registry -openstack-heat-api -openstack-heat-api-cfn -openstack-heat-api-cloudwatch -openstack-heat-engine -openstack-keystone -openstack-nova-api -openstack-nova-conductor -openstack-nova-consoleauth -openstack-nova-novncproxy -openstack-nova-scheduler" - - echo "Dumping Pacemaker config" - pcs cluster cib $pacemaker_dumpfile - - echo "Checking for missing constraints" - - if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then - pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone - fi - - if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone - fi - - if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone - fi - - if pcs resource | grep "haproxy-clone"; then - SERVICES="$SERVICES haproxy" - if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone - fi - fi - - if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone - fi - - if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then - pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false - fi - - # ensure neutron constraints https://review.openstack.org/#/c/229466 - # remove ovs-cleanup after server and add openvswitch-agent instead - if pcs constraint order show | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then - pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory - fi - if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then - pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone - fi - - - if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then - pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY - fi - - echo "Setting resource start/stop timeouts" - for service in $SERVICES; do - pcs -f $pacemaker_dumpfile resource update $service op start timeout=200s op stop timeout=200s - done - # mongod start timeout is higher, setting only stop timeout - pcs -f $pacemaker_dumpfile resource update mongod op start timeout=370s op stop timeout=200s - - echo "Making sure rabbitmq has the notify=true meta parameter" - pcs -f $pacemaker_dumpfile resource update rabbitmq meta notify=true - - echo "Applying new Pacemaker config" - if ! pcs cluster cib-push $pacemaker_dumpfile; then - echo "ERROR failed to apply new pacemaker config" - exit 1 - fi - echo "Pacemaker running, stopping cluster node and doing full package update" node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*") if [[ "$node_count" == "1" ]] ; then @@ -145,13 +53,6 @@ openstack-nova-scheduler" else pcs cluster stop fi - - # clean leftover keepalived and radvd instances from neutron - # (can be removed when we remove neutron-netns-cleanup from cluster services) - # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init - killall neutron-keepalived-state-change 2>/dev/null || : - kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || : - kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || : else echo "Upgrading openstack-puppet-modules" yum -q -y update openstack-puppet-modules |