blob: fa523e8353927289aff48c796d995a3207989c4f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
|
#!/bin/bash
# A heat-config-script which runs yum update during a stack-update.
# Inputs:
# deploy_action - yum will only be run if this is UPDATE
# update_identifier - yum will only run for previously unused values of update_identifier
# command - yum sub-command to run, defaults to "update"
# command_arguments - yum command arguments, defaults to ""
echo "Started yum_update.sh on server $deploy_server_id at `date`"
echo -n "false" > $heat_outputs_path.update_managed_packages
if [[ -z "$update_identifier" ]]; then
echo "Not running due to unset update_identifier"
exit 0
fi
timestamp_dir=/var/lib/overcloud-yum-update
mkdir -p $timestamp_dir
# sanitise to remove unusual characters
update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
# seconds to wait for this node to rejoin the cluster after update
cluster_start_timeout=360
galera_sync_timeout=360
timestamp_file="$timestamp_dir/$update_identifier"
if [[ -a "$timestamp_file" ]]; then
echo "Not running for already-run timestamp \"$update_identifier\""
exit 0
fi
touch "$timestamp_file"
command_arguments=${command_arguments:-}
list_updates=$(yum list updates)
if [[ "$list_updates" == "" ]]; then
echo "No packages require updating"
exit 0
fi
pacemaker_status=$(systemctl is-active pacemaker)
if [[ "$pacemaker_status" == "active" ]] ; then
echo "Checking for and adding missing constraints"
if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
pcs constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
fi
if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
pcs constraint order start rabbitmq-clone then openstack-keystone-clone
fi
if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
pcs constraint order promote galera-master then openstack-keystone-clone
fi
if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
pcs constraint order start haproxy-clone then openstack-keystone-clone
fi
if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
pcs constraint order start memcached-clone then openstack-keystone-clone
fi
if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
pcs constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
fi
if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
pcs resource defaults resource-stickiness=INFINITY
fi
echo "Setting resource start/stop timeouts"
# timeouts for non-openstack services and special cases
pcs resource update haproxy op start timeout=100s
pcs resource update haproxy op stop timeout=100s
# mongod start timeout is also higher, setting only stop timeout
pcs resource update mongod op stop timeout=100s
# rabbit start timeout is already 100s
pcs resource update rabbitmq op stop timeout=100s
pcs resource update memcached op start timeout=100s
pcs resource update memcached op stop timeout=100s
pcs resource update httpd op start timeout=100s
pcs resource update httpd op stop timeout=100s
# neutron-netns-cleanup stop timeout is 300s, setting only start timeout
pcs resource update neutron-netns-cleanup op start timeout=100s
# neutron-ovs-cleanup stop timeout is 300s, setting only start timeout
pcs resource update neutron-ovs-cleanup op start timeout=100s
# timeouts for openstack services
pcs resource update neutron-dhcp-agent op start timeout=100s
pcs resource update neutron-dhcp-agent op stop timeout=100s
pcs resource update neutron-l3-agent op start timeout=100s
pcs resource update neutron-l3-agent op stop timeout=100s
pcs resource update neutron-metadata-agent op start timeout=100s
pcs resource update neutron-metadata-agent op stop timeout=100s
pcs resource update neutron-openvswitch-agent op start timeout=100s
pcs resource update neutron-openvswitch-agent op stop timeout=100s
pcs resource update neutron-server op start timeout=100s
pcs resource update neutron-server op stop timeout=100s
pcs resource update openstack-ceilometer-alarm-evaluator op start timeout=100s
pcs resource update openstack-ceilometer-alarm-evaluator op stop timeout=100s
pcs resource update openstack-ceilometer-alarm-notifier op start timeout=100s
pcs resource update openstack-ceilometer-alarm-notifier op stop timeout=100s
pcs resource update openstack-ceilometer-api op start timeout=100s
pcs resource update openstack-ceilometer-api op stop timeout=100s
pcs resource update openstack-ceilometer-central op start timeout=100s
pcs resource update openstack-ceilometer-central op stop timeout=100s
pcs resource update openstack-ceilometer-collector op start timeout=100s
pcs resource update openstack-ceilometer-collector op stop timeout=100s
pcs resource update openstack-ceilometer-notification op start timeout=100s
pcs resource update openstack-ceilometer-notification op stop timeout=100s
pcs resource update openstack-cinder-api op start timeout=100s
pcs resource update openstack-cinder-api op stop timeout=100s
pcs resource update openstack-cinder-scheduler op start timeout=100s
pcs resource update openstack-cinder-scheduler op stop timeout=100s
pcs resource update openstack-cinder-volume op start timeout=100s
pcs resource update openstack-cinder-volume op stop timeout=100s
pcs resource update openstack-glance-api op start timeout=100s
pcs resource update openstack-glance-api op stop timeout=100s
pcs resource update openstack-glance-registry op start timeout=100s
pcs resource update openstack-glance-registry op stop timeout=100s
pcs resource update openstack-heat-api op start timeout=100s
pcs resource update openstack-heat-api op stop timeout=100s
pcs resource update openstack-heat-api-cfn op start timeout=100s
pcs resource update openstack-heat-api-cfn op stop timeout=100s
pcs resource update openstack-heat-api-cloudwatch op start timeout=100s
pcs resource update openstack-heat-api-cloudwatch op stop timeout=100s
pcs resource update openstack-heat-engine op start timeout=100s
pcs resource update openstack-heat-engine op stop timeout=100s
pcs resource update openstack-keystone op start timeout=100s
pcs resource update openstack-keystone op stop timeout=100s
pcs resource update openstack-nova-api op start timeout=100s
pcs resource update openstack-nova-api op stop timeout=100s
pcs resource update openstack-nova-conductor op start timeout=100s
pcs resource update openstack-nova-conductor op stop timeout=100s
pcs resource update openstack-nova-consoleauth op start timeout=100s
pcs resource update openstack-nova-consoleauth op stop timeout=100s
pcs resource update openstack-nova-novncproxy op start timeout=100s
pcs resource update openstack-nova-novncproxy op stop timeout=100s
pcs resource update openstack-nova-scheduler op start timeout=100s
pcs resource update openstack-nova-scheduler op stop timeout=100s
echo "Pacemaker running, stopping cluster node and doing full package update"
node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
if [[ "$node_count" == "1" ]] ; then
echo "Active node count is 1, stopping node with --force"
pcs cluster stop --force
else
pcs cluster stop
fi
else
echo "Excluding upgrading packages that are handled by config management tooling"
command_arguments="$command_arguments --skip-broken"
for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do
command_arguments="$command_arguments --exclude $exclude"
done
fi
command=${command:-update}
full_command="yum -y $command $command_arguments"
echo "Running: $full_command"
result=$($full_command)
return_code=$?
echo "$result"
echo "yum return code: $return_code"
if [[ "$pacemaker_status" == "active" ]] ; then
echo "Starting cluster node"
pcs cluster start
hostname=$(hostname -s)
tstart=$(date +%s)
while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > cluster_start_timeout )) ; then
echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds"
pcs status
exit 1
fi
done
tstart=$(date +%s)
while ! clustercheck; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > galera_sync_timeout )) ; then
echo "ERROR galera sync timed out"
exit 1
fi
done
pcs status
else
echo -n "true" > $heat_outputs_path.update_managed_packages
fi
echo "Finished yum_update.sh on server $deploy_server_id at `date`"
exit $return_code
|