diff options
Diffstat (limited to 'extraconfig')
-rwxr-xr-x | extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh | 12 | ||||
-rwxr-xr-x | extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh | 11 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_pacemaker_migrations.sh | 36 | ||||
-rwxr-xr-x | extraconfig/tasks/pacemaker_resource_restart.sh | 10 | ||||
-rwxr-xr-x | extraconfig/tasks/yum_update.sh | 105 |
5 files changed, 57 insertions, 117 deletions
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh index bf2ee330..f5399222 100755 --- a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh @@ -12,10 +12,8 @@ fi if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then pcs resource disable httpd check_resource httpd stopped 1800 - if pcs status | grep openstack-keystone; then - pcs resource disable openstack-keystone - check_resource openstack-keystone stopped 1800 - fi + pcs resource disable openstack-core + check_resource openstack-core stopped 1800 pcs resource disable redis check_resource redis stopped 600 pcs resource disable mongod @@ -26,6 +24,12 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname) check_resource memcached stopped 600 pcs resource disable galera check_resource galera stopped 600 + # Disable all VIPs before stopping the cluster, so that pcs doesn't use one as a source address: + # https://bugzilla.redhat.com/show_bug.cgi?id=1330688 + for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Started | awk '{ print $1 }'); do + pcs resource disable $vip + check_resource $vip stopped 60 + done pcs cluster stop --all fi diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh index 10bea573..643ae57f 100755 --- a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh @@ -24,6 +24,11 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname) exit 1 fi + for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do + pcs resource enable $vip + check_resource $vip started 60 + done + pcs resource enable galera check_resource galera started 600 pcs resource enable mongod @@ -55,10 +60,8 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname) check_resource rabbitmq started 600 pcs resource enable redis check_resource redis started 600 - if pcs status | grep openstack-keystone; then - pcs resource enable openstack-keystone - check_resource openstack-keystone started 1800 - fi + pcs resource enable openstack-core + check_resource openstack-core started 1800 pcs resource enable httpd check_resource httpd started 1800 fi diff --git a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh index 1f420b32..b63198db 100644 --- a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh +++ b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh @@ -13,6 +13,42 @@ # been already applied, it should be possible to call the function # again without damaging the deployment or failing the upgrade. +function add_missing_openstack_core_constraints { + # The CIBs are saved under /root as they might contain sensitive data + CIB="/root/migration.cib" + CIB_BACKUP="/root/backup.cib" + CIB_PUSH_NEEDED=n + + rm -f "$CIB" "$CIB_BACKUP" || /bin/true + pcs cluster cib "$CIB" + cp "$CIB" "$CIB_BACKUP" + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-sahara-api-clone then start openstack-sahara-engine-clone'; then + pcs -f "$CIB" constraint order start openstack-sahara-api-clone then start openstack-sahara-engine-clone + CIB_PUSH_NEEDED=y + fi + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-ceilometer-notification-clone'; then + pcs -f "$CIB" constraint order start openstack-core-clone then start openstack-ceilometer-notification-clone + CIB_PUSH_NEEDED=y + fi + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone'; then + pcs -f "$CIB" constraint order start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone + CIB_PUSH_NEEDED=y + fi + + if pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone'; then + CID=$(pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone' | sed -e 's/.*id\://g' -e 's/)//g') + pcs -f "$CIB" constraint remove $CID + CIB_PUSH_NEEDED=y + fi + + if [ "$CIB_PUSH_NEEDED" = 'y' ]; then + pcs cluster cib-push "$CIB" + fi +} + function remove_ceilometer_alarm { if pcs status | grep openstack-ceilometer-alarm; then # Disable pacemaker resources for ceilometer-alarms diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh index b2bdc55a..b2e5be16 100755 --- a/extraconfig/tasks/pacemaker_resource_restart.sh +++ b/extraconfig/tasks/pacemaker_resource_restart.sh @@ -8,7 +8,7 @@ pacemaker_status=$(systemctl is-active pacemaker) # and we're updating the deployment (not creating). if [ "$pacemaker_status" = "active" -a \ "$(hiera bootstrap_nodeid)" = "$(facter hostname)" -a \ - "$(hiera update_identifier)" != "nil" ]; then + "$(hiera stack_action)" = "UPDATE" ]; then #ensure neutron constraints like #https://review.openstack.org/#/c/245093/ @@ -18,8 +18,8 @@ if [ "$pacemaker_status" = "active" -a \ pcs resource disable httpd check_resource httpd stopped 300 - pcs resource disable openstack-keystone - check_resource openstack-keystone stopped 1800 + pcs resource disable openstack-core + check_resource openstack-core stopped 1800 if pcs status | grep haproxy-clone; then pcs resource restart haproxy-clone @@ -30,8 +30,8 @@ if [ "$pacemaker_status" = "active" -a \ pcs resource restart memcached-clone pcs resource restart galera-master - pcs resource enable openstack-keystone - check_resource openstack-keystone started 1800 + pcs resource enable openstack-core + check_resource openstack-core started 1800 pcs resource enable httpd check_resource httpd started 800 diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh index 66efc5c5..b045e5ea 100755 --- a/extraconfig/tasks/yum_update.sh +++ b/extraconfig/tasks/yum_update.sh @@ -23,7 +23,7 @@ update_identifier=${update_identifier//[^a-zA-Z0-9-_]/} # seconds to wait for this node to rejoin the cluster after update cluster_start_timeout=600 -galera_sync_timeout=360 +galera_sync_timeout=1800 cluster_settle_timeout=1800 timestamp_file="$timestamp_dir/$update_identifier" @@ -43,104 +43,8 @@ if [[ "$list_updates" == "" ]]; then fi pacemaker_status=$(systemctl is-active pacemaker) -pacemaker_dumpfile=$(mktemp) if [[ "$pacemaker_status" == "active" ]] ; then -SERVICES="memcached -httpd -neutron-dhcp-agent -neutron-l3-agent -neutron-metadata-agent -neutron-openvswitch-agent -neutron-server -openstack-ceilometer-api -openstack-ceilometer-central -openstack-ceilometer-collector -openstack-ceilometer-notification -openstack-aodh-evaluator -openstack-aodh-notifier -openstack-aodh-listener -openstack-cinder-api -openstack-cinder-scheduler -openstack-cinder-volume -openstack-glance-api -openstack-glance-registry -openstack-heat-api -openstack-heat-api-cfn -openstack-heat-api-cloudwatch -openstack-heat-engine -openstack-keystone -openstack-nova-api -openstack-nova-conductor -openstack-nova-consoleauth -openstack-nova-novncproxy -openstack-nova-scheduler" - - echo "Dumping Pacemaker config" - pcs cluster cib $pacemaker_dumpfile - - echo "Checking for missing constraints" - - if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then - pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone - fi - - if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone - fi - - if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone - fi - - if pcs resource | grep "haproxy-clone"; then - SERVICES="$SERVICES haproxy" - if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone - fi - fi - - if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then - pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone - fi - - if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then - pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false - fi - - if ! pcs constraint order show | grep "promote redis-master then start openstack-aodh-evaluator-clone"; then - pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-aodh-evaluator-clone require-all=false - fi - # ensure neutron constraints https://review.openstack.org/#/c/229466 - # remove ovs-cleanup after server and add openvswitch-agent instead - if pcs constraint order show | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then - pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory - fi - if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then - pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone - fi - - - if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then - pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY - fi - - echo "Setting resource start/stop timeouts" - for service in $SERVICES; do - pcs -f $pacemaker_dumpfile resource update $service op start timeout=200s op stop timeout=200s - done - # mongod start timeout is higher, setting only stop timeout - pcs -f $pacemaker_dumpfile resource update mongod op start timeout=370s op stop timeout=200s - - echo "Making sure rabbitmq has the notify=true meta parameter" - pcs -f $pacemaker_dumpfile resource update rabbitmq meta notify=true - - echo "Applying new Pacemaker config" - if ! pcs cluster cib-push $pacemaker_dumpfile; then - echo "ERROR failed to apply new pacemaker config" - exit 1 - fi - echo "Pacemaker running, stopping cluster node and doing full package update" node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*") if [[ "$node_count" == "1" ]] ; then @@ -149,13 +53,6 @@ openstack-nova-scheduler" else pcs cluster stop fi - - # clean leftover keepalived and radvd instances from neutron - # (can be removed when we remove neutron-netns-cleanup from cluster services) - # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init - killall neutron-keepalived-state-change 2>/dev/null || : - kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || : - kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || : else echo "Upgrading openstack-puppet-modules" yum -q -y update openstack-puppet-modules |