5 files changed, 57 insertions, 117 deletions
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
index bf2ee330..f5399222 100755
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
@@ -12,10 +12,8 @@ fi
 if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
     pcs resource disable httpd
     check_resource httpd stopped 1800
-    if pcs status | grep openstack-keystone; then
-        pcs resource disable openstack-keystone
-        check_resource openstack-keystone stopped 1800
-    fi
+    pcs resource disable openstack-core
+    check_resource openstack-core stopped 1800
     pcs resource disable redis
     check_resource redis stopped 600
     pcs resource disable mongod
@@ -26,6 +24,12 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)
     check_resource memcached stopped 600
     pcs resource disable galera
     check_resource galera stopped 600
+    # Disable all VIPs before stopping the cluster, so that pcs doesn't use one as a source address:
+    #   https://bugzilla.redhat.com/show_bug.cgi?id=1330688
+    for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Started | awk '{ print $1 }'); do
+      pcs resource disable $vip
+      check_resource $vip stopped 60
+    done
     pcs cluster stop --all
 fi
 
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
index 10bea573..643ae57f 100755
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
@@ -24,6 +24,11 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)
         exit 1
     fi
 
+    for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do
+      pcs resource enable $vip
+      check_resource $vip started 60
+    done
+
     pcs resource enable galera
     check_resource galera started 600
     pcs resource enable mongod
@@ -55,10 +60,8 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)
     check_resource rabbitmq started 600
     pcs resource enable redis
     check_resource redis started 600
-    if pcs status | grep openstack-keystone; then
-        pcs resource enable openstack-keystone
-        check_resource openstack-keystone started 1800
-    fi
+    pcs resource enable openstack-core
+    check_resource openstack-core started 1800
     pcs resource enable httpd
     check_resource httpd started 1800
 fi
diff --git a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh
index 1f420b32..b63198db 100644
--- a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh
+++ b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh
@@ -13,6 +13,42 @@
 # been already applied, it should be possible to call the function
 # again without damaging the deployment or failing the upgrade.
 
+function add_missing_openstack_core_constraints {
+    # The CIBs are saved under /root as they might contain sensitive data
+    CIB="/root/migration.cib"
+    CIB_BACKUP="/root/backup.cib"
+    CIB_PUSH_NEEDED=n
+
+    rm -f "$CIB" "$CIB_BACKUP" || /bin/true
+    pcs cluster cib "$CIB"
+    cp "$CIB" "$CIB_BACKUP"
+
+    if ! pcs -f "$CIB" constraint --full | grep 'start openstack-sahara-api-clone then start openstack-sahara-engine-clone'; then
+        pcs -f "$CIB" constraint order start openstack-sahara-api-clone then start openstack-sahara-engine-clone
+        CIB_PUSH_NEEDED=y
+    fi
+
+    if ! pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-ceilometer-notification-clone'; then
+        pcs -f "$CIB" constraint order start openstack-core-clone then start openstack-ceilometer-notification-clone
+        CIB_PUSH_NEEDED=y
+    fi
+
+    if ! pcs -f "$CIB" constraint --full | grep 'start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone'; then
+        pcs -f "$CIB" constraint order start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone
+        CIB_PUSH_NEEDED=y
+    fi
+
+    if pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone'; then
+        CID=$(pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone' | sed -e 's/.*id\://g' -e 's/)//g')
+        pcs -f "$CIB" constraint remove $CID
+        CIB_PUSH_NEEDED=y
+    fi
+
+    if [ "$CIB_PUSH_NEEDED" = 'y' ]; then
+        pcs cluster cib-push "$CIB"
+    fi
+}
+
 function remove_ceilometer_alarm {
     if pcs status | grep openstack-ceilometer-alarm; then
         # Disable pacemaker resources for ceilometer-alarms
diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh
index b2bdc55a..b2e5be16 100755
--- a/extraconfig/tasks/pacemaker_resource_restart.sh
+++ b/extraconfig/tasks/pacemaker_resource_restart.sh
@@ -8,7 +8,7 @@ pacemaker_status=$(systemctl is-active pacemaker)
 # and we're updating the deployment (not creating).
 if [ "$pacemaker_status" = "active" -a \
      "$(hiera bootstrap_nodeid)" = "$(facter hostname)" -a \
-     "$(hiera update_identifier)" != "nil" ]; then
+     "$(hiera stack_action)" = "UPDATE" ]; then
 
     #ensure neutron constraints like
     #https://review.openstack.org/#/c/245093/
@@ -18,8 +18,8 @@ if [ "$pacemaker_status" = "active" -a \
 
     pcs resource disable httpd
     check_resource httpd stopped 300
-    pcs resource disable openstack-keystone
-    check_resource openstack-keystone stopped 1800
+    pcs resource disable openstack-core
+    check_resource openstack-core stopped 1800
 
     if pcs status | grep haproxy-clone; then
         pcs resource restart haproxy-clone
@@ -30,8 +30,8 @@ if [ "$pacemaker_status" = "active" -a \
     pcs resource restart memcached-clone
     pcs resource restart galera-master
 
-    pcs resource enable openstack-keystone
-    check_resource openstack-keystone started 1800
+    pcs resource enable openstack-core
+    check_resource openstack-core started 1800
     pcs resource enable httpd
     check_resource httpd started 800
 
diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh
index 66efc5c5..b045e5ea 100755
--- a/extraconfig/tasks/yum_update.sh
+++ b/extraconfig/tasks/yum_update.sh
@@ -23,7 +23,7 @@ update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
 
 # seconds to wait for this node to rejoin the cluster after update
 cluster_start_timeout=600
-galera_sync_timeout=360
+galera_sync_timeout=1800
 cluster_settle_timeout=1800
 
 timestamp_file="$timestamp_dir/$update_identifier"
@@ -43,104 +43,8 @@ if [[ "$list_updates" == "" ]]; then
 fi
 
 pacemaker_status=$(systemctl is-active pacemaker)
-pacemaker_dumpfile=$(mktemp)
 
 if [[ "$pacemaker_status" == "active" ]] ; then
-SERVICES="memcached
-httpd
-neutron-dhcp-agent
-neutron-l3-agent
-neutron-metadata-agent
-neutron-openvswitch-agent
-neutron-server
-openstack-ceilometer-api
-openstack-ceilometer-central
-openstack-ceilometer-collector
-openstack-ceilometer-notification
-openstack-aodh-evaluator
-openstack-aodh-notifier
-openstack-aodh-listener
-openstack-cinder-api
-openstack-cinder-scheduler
-openstack-cinder-volume
-openstack-glance-api
-openstack-glance-registry
-openstack-heat-api
-openstack-heat-api-cfn
-openstack-heat-api-cloudwatch
-openstack-heat-engine
-openstack-keystone
-openstack-nova-api
-openstack-nova-conductor
-openstack-nova-consoleauth
-openstack-nova-novncproxy
-openstack-nova-scheduler"
-
-    echo "Dumping Pacemaker config"
-    pcs cluster cib $pacemaker_dumpfile
-
-    echo "Checking for missing constraints"
-
-    if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
-    fi
-
-    if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone
-    fi
-
-    if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone
-    fi
-
-    if pcs resource | grep "haproxy-clone"; then
-        SERVICES="$SERVICES haproxy"
-        if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
-            pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone
-        fi
-    fi
-
-    if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone
-    fi
-
-    if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
-    fi
-
-    if ! pcs constraint order show | grep "promote redis-master then start openstack-aodh-evaluator-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-aodh-evaluator-clone require-all=false
-    fi
-    # ensure neutron constraints https://review.openstack.org/#/c/229466
-    # remove ovs-cleanup after server and add openvswitch-agent instead
-    if  pcs constraint order show  | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then
-        pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory
-    fi
-    if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone
-    fi
-
-
-    if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
-        pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY
-    fi
-
-    echo "Setting resource start/stop timeouts"
-    for service in $SERVICES; do
-        pcs -f $pacemaker_dumpfile resource update $service op start timeout=200s op stop timeout=200s
-    done
-    # mongod start timeout is higher, setting only stop timeout
-    pcs -f $pacemaker_dumpfile resource update mongod op start timeout=370s op  stop timeout=200s
-
-    echo "Making sure rabbitmq has the notify=true meta parameter"
-    pcs -f $pacemaker_dumpfile resource update rabbitmq meta notify=true
-
-    echo "Applying new Pacemaker config"
-    if ! pcs cluster cib-push $pacemaker_dumpfile; then
-        echo "ERROR failed to apply new pacemaker config"
-        exit 1
-    fi
-
     echo "Pacemaker running, stopping cluster node and doing full package update"
     node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
     if [[ "$node_count" == "1" ]] ; then
@@ -149,13 +53,6 @@ openstack-nova-scheduler"
     else
         pcs cluster stop
     fi
-
-    # clean leftover keepalived and radvd instances from neutron
-    # (can be removed when we remove neutron-netns-cleanup from cluster services)
-    # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init
-    killall neutron-keepalived-state-change 2>/dev/null || :
-    kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || :
-    kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || :
 else
     echo "Upgrading openstack-puppet-modules"
     yum -q -y update openstack-puppet-modules