blob: 6ab2501c11990cf61670f10d7b25d97df6251d4b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
|
#!/bin/bash
# A heat-config-script which runs yum update during a stack-update.
# Inputs:
# deploy_action - yum will only be run if this is UPDATE
# update_identifier - yum will only run for previously unused values of update_identifier
# command - yum sub-command to run, defaults to "update"
# command_arguments - yum command arguments, defaults to ""
echo "Started yum_update.sh on server $deploy_server_id at `date`"
echo -n "false" > $heat_outputs_path.update_managed_packages
if [[ -z "$update_identifier" ]]; then
echo "Not running due to unset update_identifier"
exit 0
fi
timestamp_dir=/var/lib/overcloud-yum-update
mkdir -p $timestamp_dir
# sanitise to remove unusual characters
update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
# seconds to wait for this node to rejoin the cluster after update
cluster_start_timeout=600
galera_sync_timeout=360
timestamp_file="$timestamp_dir/$update_identifier"
if [[ -a "$timestamp_file" ]]; then
echo "Not running for already-run timestamp \"$update_identifier\""
exit 0
fi
touch "$timestamp_file"
command_arguments=${command_arguments:-}
list_updates=$(yum list updates)
if [[ "$list_updates" == "" ]]; then
echo "No packages require updating"
exit 0
fi
pacemaker_status=$(systemctl is-active pacemaker)
pacemaker_dumpfile=$(mktemp)
if [[ "$pacemaker_status" == "active" ]] ; then
echo "Dumping Pacemaker config"
pcs cluster cib $pacemaker_dumpfile
echo "Checking for missing constraints"
if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
fi
if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone
fi
if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone
fi
if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone
fi
if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone
fi
if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
fi
if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY
fi
echo "Setting resource start/stop timeouts"
SERVICES="
haproxy
memcached
httpd
neutron-dhcp-agent
neutron-l3-agent
neutron-metadata-agent
neutron-openvswitch-agent
neutron-server
openstack-ceilometer-alarm-evaluator
openstack-ceilometer-alarm-notifier
openstack-ceilometer-api
openstack-ceilometer-central
openstack-ceilometer-collector
openstack-ceilometer-notification
openstack-cinder-api
openstack-cinder-scheduler
openstack-cinder-volume
openstack-glance-api
openstack-glance-registry
openstack-heat-api
openstack-heat-api-cfn
openstack-heat-api-cloudwatch
openstack-heat-engine
openstack-keystone
openstack-nova-api
openstack-nova-conductor
openstack-nova-consoleauth
openstack-nova-novncproxy
openstack-nova-scheduler"
for service in $SERVICES; do
pcs -f $pacemaker_dumpfile resource update $service op start timeout=100s op stop timeout=100s
done
# mongod start timeout is higher, setting only stop timeout
pcs resource update mongod op stop timeout=100s
echo "Applying new Pacemaker config"
pcs cluster cib-push $pacemaker_dumpfile
echo "Pacemaker running, stopping cluster node and doing full package update"
node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
if [[ "$node_count" == "1" ]] ; then
echo "Active node count is 1, stopping node with --force"
pcs cluster stop --force
else
pcs cluster stop
fi
else
echo "Excluding upgrading packages that are handled by config management tooling"
command_arguments="$command_arguments --skip-broken"
for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do
command_arguments="$command_arguments --exclude $exclude"
done
fi
command=${command:-update}
full_command="yum -y $command $command_arguments"
echo "Running: $full_command"
result=$($full_command)
return_code=$?
echo "$result"
echo "yum return code: $return_code"
if [[ "$pacemaker_status" == "active" ]] ; then
echo "Starting cluster node"
pcs cluster start
hostname=$(hostname -s)
tstart=$(date +%s)
while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > cluster_start_timeout )) ; then
echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds"
pcs status
exit 1
fi
done
tstart=$(date +%s)
while ! clustercheck; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > galera_sync_timeout )) ; then
echo "ERROR galera sync timed out"
exit 1
fi
done
pcs status
else
echo -n "true" > $heat_outputs_path.update_managed_packages
fi
echo "Finished yum_update.sh on server $deploy_server_id at `date`"
exit $return_code
|