diff options
Diffstat (limited to 'extraconfig/tasks')
-rwxr-xr-x | extraconfig/tasks/yum_update.sh | 113 |
1 files changed, 111 insertions, 2 deletions
diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh index eaeb7ef0..e3e9545d 100755 --- a/extraconfig/tasks/yum_update.sh +++ b/extraconfig/tasks/yum_update.sh @@ -22,7 +22,8 @@ mkdir -p $timestamp_dir update_identifier=${update_identifier//[^a-zA-Z0-9-_]/} # seconds to wait for this node to rejoin the cluster after update -cluster_start_timeout=360 +cluster_start_timeout=600 +galera_sync_timeout=360 timestamp_file="$timestamp_dir/$update_identifier" if [[ -a "$timestamp_file" ]]; then @@ -41,10 +42,107 @@ if [[ "$list_updates" == "" ]]; then fi pacemaker_status=$(systemctl is-active pacemaker) +pacemaker_dumpfile=$(mktemp) if [[ "$pacemaker_status" == "active" ]] ; then + echo "Dumping Pacemaker config" + pcs cluster cib $pacemaker_dumpfile + + echo "Checking for missing constraints" + + if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then + pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone + fi + + if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then + pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone + fi + + if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then + pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone + fi + + if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then + pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone + fi + + if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then + pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone + fi + + if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then + pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false + fi + + # ensure neutron constraints https://review.openstack.org/#/c/229466 + # remove ovs-cleanup after server and add openvswitch-agent instead + if pcs constraint order show | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then + pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory + fi + if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then + pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone + fi + + + if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then + pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY + fi + + echo "Setting resource start/stop timeouts" + SERVICES=" +haproxy +memcached +httpd +neutron-dhcp-agent +neutron-l3-agent +neutron-metadata-agent +neutron-openvswitch-agent +neutron-server +openstack-ceilometer-alarm-evaluator +openstack-ceilometer-alarm-notifier +openstack-ceilometer-api +openstack-ceilometer-central +openstack-ceilometer-collector +openstack-ceilometer-notification +openstack-cinder-api +openstack-cinder-scheduler +openstack-cinder-volume +openstack-glance-api +openstack-glance-registry +openstack-heat-api +openstack-heat-api-cfn +openstack-heat-api-cloudwatch +openstack-heat-engine +openstack-keystone +openstack-nova-api +openstack-nova-conductor +openstack-nova-consoleauth +openstack-nova-novncproxy +openstack-nova-scheduler" + for service in $SERVICES; do + pcs -f $pacemaker_dumpfile resource update $service op start timeout=100s op stop timeout=100s + done + # mongod start timeout is higher, setting only stop timeout + pcs resource update mongod op stop timeout=100s + + echo "Applying new Pacemaker config" + pcs cluster cib-push $pacemaker_dumpfile + echo "Pacemaker running, stopping cluster node and doing full package update" - pcs cluster stop + node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*") + if [[ "$node_count" == "1" ]] ; then + echo "Active node count is 1, stopping node with --force" + pcs cluster stop --force + else + pcs cluster stop + fi + + # clean leftover keepalived and radvd instances from neutron + # (can be removed when we remove neutron-netns-cleanup from cluster services) + # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init + killall neutron-keepalived-state-change 2>/dev/null || : + kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || : + kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || : else echo "Excluding upgrading packages that are handled by config management tooling" command_arguments="$command_arguments --skip-broken" @@ -77,6 +175,17 @@ if [[ "$pacemaker_status" == "active" ]] ; then exit 1 fi done + + tstart=$(date +%s) + while ! clustercheck; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > galera_sync_timeout )) ; then + echo "ERROR galera sync timed out" + exit 1 + fi + done + pcs status else |