1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
#!/bin/bash
set -eu
cluster_sync_timeout=1800
check_cluster
check_pcsd
check_clean_cluster
check_python_rpm
check_galera_root_password
check_disk_for_mysql_dump
# We want to disable fencing during the cluster --stop as it might fence
# nodes where a service fails to stop, which could be fatal during an upgrade
# procedure. So we remember the stonith state. If it was enabled we reenable it
# at the end of this script
STONITH_STATE=$(pcs property show stonith-enabled | grep "stonith-enabled" | awk '{ print $2 }')
pcs property set stonith-enabled=false
# Migrate to HA NG
if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
migrate_full_to_ng_ha
fi
# After migrating the cluster to HA-NG the services not under pacemaker's control
# are still up and running. We need to stop them explicitely otherwise during the yum
# upgrade the rpm %post sections will try to do a systemctl try-restart <service>, which
# is going to take a long time because rabbit is down. By having the service stopped
# systemctl try-restart is a noop
for $service in $(services_to_migrate); do
manage_systemd_service stop "${service%%-clone}"
check_resource_systemd "${service%%-clone}" stopped 600
done
# In case the mysql package is updated, the database on disk must be
# upgraded as well. This typically needs to happen during major
# version upgrades (e.g. 5.5 -> 5.6, 5.5 -> 10.1...)
#
# Because in-place upgrades are not supported across 2+ major versions
# (e.g. 5.5 -> 10.1), we rely on logical upgrades via dump/restore cycle
# https://bugzilla.redhat.com/show_bug.cgi?id=1341968
#
# The default is to determine automatically if upgrade is needed based
# on mysql package versionning, but this can be overriden manually
# to support specific upgrade scenario
if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
mysqldump $backup_flags > "$MYSQL_BACKUP_DIR/openstack_database.sql"
cp -rdp /etc/my.cnf* "$MYSQL_BACKUP_DIR"
fi
pcs resource disable redis
check_resource redis stopped 600
pcs resource disable rabbitmq
check_resource rabbitmq stopped 600
pcs resource disable galera
check_resource galera stopped 600
# Disable all VIPs before stopping the cluster, so that pcs doesn't use one as a source address:
# https://bugzilla.redhat.com/show_bug.cgi?id=1330688
for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Started | awk '{ print $1 }'); do
pcs resource disable $vip
check_resource $vip stopped 60
done
pcs cluster stop --all
fi
# Swift isn't controled by pacemaker
systemctl_swift stop
tstart=$(date +%s)
while systemctl is-active pacemaker; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > cluster_sync_timeout )) ; then
echo_error "ERROR: cluster shutdown timed out"
exit 1
fi
done
# The reason we do an sql dump *and* we move the old dir out of
# the way is because it gives us an extra level of safety in case
# something goes wrong during the upgrade. Once the restore is
# successful we go ahead and remove it. If the directory exists
# we bail out as it means the upgrade process had issues in the last
# run.
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
if [ -d $MYSQL_TEMP_UPGRADE_BACKUP_DIR ]; then
echo_error "ERROR: mysql backup dir already exist"
exit 1
fi
mv /var/lib/mysql $MYSQL_TEMP_UPGRADE_BACKUP_DIR
fi
yum -y install python-zaqarclient # needed for os-collect-config
yum -y -q update
# We need to ensure at least those two configuration settings, otherwise
# mariadb 10.1+ won't activate galera replication.
# wsrep_cluster_address must only be set though, its value does not
# matter because it's overriden by the galera resource agent.
cat >> /etc/my.cnf.d/galera.cnf <<EOF
[mysqld]
wsrep_on = ON
wsrep_cluster_address = gcomm://localhost
EOF
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
# Scripts run via heat have no HOME variable set and this confuses
# mysqladmin
export HOME=/root
mkdir /var/lib/mysql || /bin/true
chown mysql:mysql /var/lib/mysql
chmod 0755 /var/lib/mysql
restorecon -R /var/lib/mysql/
mysql_install_db --datadir=/var/lib/mysql --user=mysql
chown -R mysql:mysql /var/lib/mysql/
if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
mysqld_safe --wsrep-new-cluster &
# We have a populated /root/.my.cnf with root/password here so
# we need to temporarily rename it because the newly created
# db is empty and no root password is set
mv /root/.my.cnf /root/.my.cnf.temporary
timeout 60 sh -c 'while ! mysql -e "" &> /dev/null; do sleep 1; done'
mysql -u root < "$MYSQL_BACKUP_DIR/openstack_database.sql"
mv /root/.my.cnf.temporary /root/.my.cnf
mysqladmin -u root shutdown
# The import was successful so we may remove the folder
rm -r "$MYSQL_BACKUP_DIR"
fi
fi
# If we reached here without error we can safely blow away the origin
# mysql dir from every controller
# TODO: What if the upgrade fails on the bootstrap node, but not on
# this controller. Data may be lost.
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
rm -r $MYSQL_TEMP_UPGRADE_BACKUP_DIR
fi
# Let's reset the stonith back to true if it was true, before starting the cluster
if [ $STONITH_STATE == "true" ]; then
pcs -f /var/lib/pacemaker/cib/cib.xml property set stonith-enabled=true
fi
# Pin messages sent to compute nodes to kilo, these will be upgraded later
crudini --set /etc/nova/nova.conf upgrade_levels compute "$upgrade_level_nova_compute"
crudini --set /etc/sahara/sahara.conf DEFAULT plugins ambari,cdh,mapr,vanilla,spark,storm
|