aboutsummaryrefslogtreecommitdiffstats
path: root/extraconfig
diff options
context:
space:
mode:
Diffstat (limited to 'extraconfig')
-rwxr-xr-xextraconfig/tasks/major_upgrade_ceph_mon.sh63
-rw-r--r--extraconfig/tasks/major_upgrade_ceph_storage.sh77
-rwxr-xr-xextraconfig/tasks/major_upgrade_controller_pacemaker_1.sh33
-rwxr-xr-xextraconfig/tasks/major_upgrade_controller_pacemaker_2.sh1
-rw-r--r--extraconfig/tasks/major_upgrade_pacemaker.yaml18
-rw-r--r--extraconfig/tasks/major_upgrade_pacemaker_migrations.sh61
-rwxr-xr-xextraconfig/tasks/pacemaker_resource_restart.sh45
7 files changed, 187 insertions, 111 deletions
diff --git a/extraconfig/tasks/major_upgrade_ceph_mon.sh b/extraconfig/tasks/major_upgrade_ceph_mon.sh
new file mode 100755
index 00000000..38befbbf
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_ceph_mon.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+set -eu
+set -o pipefail
+
+echo INFO: starting $(basename "$0")
+
+# Exit if not running
+if ! pidof ceph-mon; then
+ echo INFO: ceph-mon is not running, skipping
+ exit 0
+fi
+
+# Exit if not Hammer
+INSTALLED_VERSION=$(ceph --version | awk '{print $3}')
+if ! [[ "$INSTALLED_VERSION" =~ ^0\.94.* ]]; then
+ echo INFO: version of Ceph installed is not 0.94, skipping
+ exit 0
+fi
+
+MON_PID=$(pidof ceph-mon)
+MON_ID=$(hostname -s)
+
+# Stop daemon using Hammer sysvinit script
+service ceph stop mon.${MON_ID}
+
+# Ensure it's stopped
+timeout 60 bash -c "while kill -0 ${MON_PID} 2> /dev/null; do
+ sleep 2;
+done"
+
+# Update to Jewel
+yum -y -q update ceph-mon
+
+# Restart/Exit if not on Jewel, only in that case we need the changes
+UPDATED_VERSION=$(ceph --version | awk '{print $3}')
+if [[ "$UPDATED_VERSION" =~ ^0\.94.* ]]; then
+ echo WARNING: Ceph was not upgraded, restarting daemons
+ service ceph start mon.${MON_ID}
+elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then
+ echo INFO: Ceph was upgraded to Jewel
+
+ # RPM could own some of these but we can't take risks on the pre-existing files
+ for d in /var/lib/ceph/mon /var/log/ceph /var/run/ceph /etc/ceph; do
+ chown -R ceph:ceph $d
+ done
+
+ # Replay udev events with newer rules
+ udevadm trigger
+
+ # Enable systemd unit
+ systemctl enable ceph-mon.target
+ systemctl enable ceph-mon@${MON_ID}
+ systemctl start ceph-mon@${MON_ID}
+
+ # Wait for daemon to be back in the quorum
+ timeout 300 bash -c "until (ceph quorum_status | jq .quorum_names | grep -sq ${MON_ID}); do
+ echo Waiting for mon.${MON_ID} to re-join quorum;
+ sleep 10;
+ done"
+else
+ echo ERROR: Ceph was upgraded to an unknown release, daemon is stopped, need manual intervention
+ exit 1
+fi
diff --git a/extraconfig/tasks/major_upgrade_ceph_storage.sh b/extraconfig/tasks/major_upgrade_ceph_storage.sh
index de42b16d..03a1c1c2 100644
--- a/extraconfig/tasks/major_upgrade_ceph_storage.sh
+++ b/extraconfig/tasks/major_upgrade_ceph_storage.sh
@@ -4,32 +4,89 @@
# major upgrade workflow.
#
set -eu
+set -o pipefail
UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh
-cat > $UPGRADE_SCRIPT << ENDOFCAT
+cat > $UPGRADE_SCRIPT << 'ENDOFCAT'
+#!/bin/bash
### DO NOT MODIFY THIS FILE
### This file is automatically delivered to the ceph-storage nodes as part of the
### tripleo upgrades workflow
+set -eu
+
+echo INFO: starting $(basename "$0")
+# Exit if not running
+if ! pidof ceph-osd; then
+ echo INFO: ceph-osd is not running, skipping
+ exit 0
+fi
-function systemctl_ceph {
- action=\$1
- systemctl \$action ceph
-}
+# Exit if not Hammer
+INSTALLED_VERSION=$(ceph --version | awk '{print $3}')
+if ! [[ "$INSTALLED_VERSION" =~ ^0\.94.* ]]; then
+ echo INFO: version of Ceph installed is not 0.94, skipping
+ exit 0
+fi
-# "so that mirrors aren't rebalanced as if the OSD died" - gfidente
+OSD_PIDS=$(pidof ceph-osd)
+OSD_IDS=$(ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }')
+
+# "so that mirrors aren't rebalanced as if the OSD died" - gfidente / leseb
ceph osd set noout
+ceph osd set norebalance
+ceph osd set nodeep-scrub
+ceph osd set noscrub
+
+# Stop daemon using Hammer sysvinit script
+for OSD_ID in $OSD_IDS; do
+ service ceph stop osd.${OSD_ID}
+done
+
+# Nice guy will return non-0 only when all failed
+timeout 60 bash -c "while kill -0 ${OSD_PIDS} 2> /dev/null; do
+ sleep 2;
+done"
-systemctl_ceph stop
+# Update (Ceph to Jewel)
yum -y install python-zaqarclient # needed for os-collect-config
yum -y update
-systemctl_ceph start
-ceph osd unset noout
+# Restart/Exit if not on Jewel, only in that case we need the changes
+UPDATED_VERSION=$(ceph --version | awk '{print $3}')
+if [[ "$UPDATED_VERSION" =~ ^0\.94.* ]]; then
+ echo WARNING: Ceph was not upgraded, restarting daemon
+ for OSD_ID in $OSD_IDS; do
+ service ceph start osd.${OSD_ID}
+ done
+elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then
+ # RPM could own some of these but we can't take risks on the pre-existing files
+ for d in /var/lib/ceph/osd /var/log/ceph /var/run/ceph /etc/ceph; do
+ chown -R ceph:ceph $d
+ done
+
+ # Replay udev events with newer rules
+ udevadm trigger && udevadm settle
+
+ # Enable systemd unit
+ systemctl enable ceph-osd.target
+ for OSD_ID in $OSD_IDS; do
+ systemctl enable ceph-osd@${OSD_ID}
+ systemctl start ceph-osd@${OSD_ID}
+ done
+ echo INFO: Ceph was upgraded to Jewel
+else
+ echo ERROR: Ceph was upgraded to an unknown release, daemon is stopped, need manual intervention
+ exit 1
+fi
+
+ceph osd unset noout
+ceph osd unset norebalance
+ceph osd unset nodeep-scrub
+ceph osd unset noscrub
ENDOFCAT
# ensure the permissions are OK
chmod 0755 $UPGRADE_SCRIPT
-
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
index 36d85444..0b702630 100755
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
@@ -18,7 +18,7 @@ STONITH_STATE=$(pcs property show stonith-enabled | grep "stonith-enabled" | awk
pcs property set stonith-enabled=false
# If for some reason rpm-python are missing we want to error out early enough
-if [ ! rpm -q rpm-python &> /dev/null ]; then
+if ! rpm -q rpm-python &> /dev/null; then
echo_error "ERROR: upgrade cannot start without rpm-python installed"
exit 1
fi
@@ -50,7 +50,7 @@ if [ "$mariadb_do_major_upgrade" = "auto" ]; then
DO_MYSQL_UPGRADE=0
fi
echo "mysql upgrade required: $DO_MYSQL_UPGRADE"
-elif [ "$mariadb_do_major_upgrade" = 0 ]; then
+elif [ "$mariadb_do_major_upgrade" = "no" ]; then
DO_MYSQL_UPGRADE=0
else
DO_MYSQL_UPGRADE=1
@@ -155,17 +155,19 @@ wsrep_on = ON
wsrep_cluster_address = gcomm://localhost
EOF
-if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
- if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
- # Scripts run via heat have no HOME variable set and this confuses
- # mysqladmin
- export HOME=/root
- mkdir /var/lib/mysql || /bin/true
- chown mysql:mysql /var/lib/mysql
- chmod 0755 /var/lib/mysql
- restorecon -R /var/lib/mysql/
- mysql_install_db --datadir=/var/lib/mysql --user=mysql
- chown -R mysql:mysql /var/lib/mysql/
+if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
+ # Scripts run via heat have no HOME variable set and this confuses
+ # mysqladmin
+ export HOME=/root
+
+ mkdir /var/lib/mysql || /bin/true
+ chown mysql:mysql /var/lib/mysql
+ chmod 0755 /var/lib/mysql
+ restorecon -R /var/lib/mysql/
+ mysql_install_db --datadir=/var/lib/mysql --user=mysql
+ chown -R mysql:mysql /var/lib/mysql/
+
+ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
mysqld_safe --wsrep-new-cluster &
# We have a populated /root/.my.cnf with root/password here so
# we need to temporarily rename it because the newly created
@@ -182,6 +184,9 @@ fi
# If we reached here without error we can safely blow away the origin
# mysql dir from every controller
+
+# TODO: What if the upgrade fails on the bootstrap node, but not on
+# this controller. Data may be lost.
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
rm -r $MYSQL_TEMP_UPGRADE_BACKUP_DIR
fi
@@ -199,3 +204,5 @@ crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit
# https://bugzilla.redhat.com/show_bug.cgi?id=1284058
# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists
crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server"
+# LP: 1615035, required only for M/N upgrade.
+crudini --set /etc/nova/nova.conf DEFAULT scheduler_host_manager host_manager
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
index 643ae57f..bc708cce 100755
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
@@ -53,6 +53,7 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)
keystone-manage db_sync
neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
nova-manage db sync
+ nova-manage api_db sync
pcs resource enable memcached
check_resource memcached started 600
diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml
index c70a954f..c2e14880 100644
--- a/extraconfig/tasks/major_upgrade_pacemaker.yaml
+++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml
@@ -32,6 +32,23 @@ resources:
# map_merge with input_values instead of feeding params into scripts
# via str_replace on bash snippets
+ CephMonUpgradeConfig:
+ type: OS::Heat::SoftwareConfig
+ properties:
+ group: script
+ config: {get_file: major_upgrade_ceph_mon.sh}
+
+ CephMonUpgradeDeployment:
+ type: OS::Heat::SoftwareDeploymentGroup
+ properties:
+ servers: {get_param: controller_servers}
+ config: {get_resource: CephMonUpgradeConfig}
+ input_values: {get_param: input_values}
+ batch_create:
+ max_batch_size: 1
+ rolling_update:
+ max_batch_size: 1
+
ControllerPacemakerUpgradeConfig_Step1:
type: OS::Heat::SoftwareConfig
properties:
@@ -57,6 +74,7 @@ resources:
ControllerPacemakerUpgradeDeployment_Step1:
type: OS::Heat::SoftwareDeploymentGroup
+ depends_on: CephMonUpgradeDeployment
properties:
servers: {get_param: controller_servers}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step1}
diff --git a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh
index c36786a9..7ed7012d 100644
--- a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh
+++ b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh
@@ -56,64 +56,3 @@ function is_mysql_upgrade_needed {
fi
echo "1"
}
-
-function add_missing_openstack_core_constraints {
- # The CIBs are saved under /root as they might contain sensitive data
- CIB="/root/migration.cib"
- CIB_BACKUP="/root/backup.cib"
- CIB_PUSH_NEEDED=n
-
- rm -f "$CIB" "$CIB_BACKUP" || /bin/true
- pcs cluster cib "$CIB"
- cp "$CIB" "$CIB_BACKUP"
-
- if ! pcs -f "$CIB" constraint --full | grep 'start openstack-sahara-api-clone then start openstack-sahara-engine-clone'; then
- pcs -f "$CIB" constraint order start openstack-sahara-api-clone then start openstack-sahara-engine-clone
- CIB_PUSH_NEEDED=y
- fi
-
- if ! pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-ceilometer-notification-clone'; then
- pcs -f "$CIB" constraint order start openstack-core-clone then start openstack-ceilometer-notification-clone
- CIB_PUSH_NEEDED=y
- fi
-
- if ! pcs -f "$CIB" constraint --full | grep 'start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone'; then
- pcs -f "$CIB" constraint order start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone
- CIB_PUSH_NEEDED=y
- fi
-
- if pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone'; then
- CID=$(pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone' | sed -e 's/.*id\://g' -e 's/)//g')
- pcs -f "$CIB" constraint remove $CID
- CIB_PUSH_NEEDED=y
- fi
-
- if [ "$CIB_PUSH_NEEDED" = 'y' ]; then
- pcs cluster cib-push "$CIB"
- fi
-}
-
-function remove_ceilometer_alarm {
- if pcs status | grep openstack-ceilometer-alarm; then
- # Disable pacemaker resources for ceilometer-alarms
- pcs resource disable openstack-ceilometer-alarm-evaluator
- check_resource openstack-ceilometer-alarm-evaluator stopped 600
- pcs resource delete openstack-ceilometer-alarm-evaluator
- pcs resource disable openstack-ceilometer-alarm-notifier
- check_resource openstack-ceilometer-alarm-notifier stopped 600
- pcs resource delete openstack-ceilometer-alarm-notifier
-
- # remove constraints
- pcs constraint remove ceilometer-delay-then-ceilometer-alarm-evaluator-constraint
- pcs constraint remove ceilometer-alarm-evaluator-with-ceilometer-delay-colocation
- pcs constraint remove ceilometer-alarm-evaluator-then-ceilometer-alarm-notifier-constraint
- pcs constraint remove ceilometer-alarm-notifier-with-ceilometer-alarm-evaluator-colocation
- pcs constraint remove ceilometer-alarm-notifier-then-ceilometer-notification-constraint
- pcs constraint remove ceilometer-notification-with-ceilometer-alarm-notifier-colocation
-
- fi
-
- # uninstall openstack-ceilometer-alarm package
- yum -y remove openstack-ceilometer-alarm
-
-}
diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh
index b2e5be16..fd1fd0dc 100755
--- a/extraconfig/tasks/pacemaker_resource_restart.sh
+++ b/extraconfig/tasks/pacemaker_resource_restart.sh
@@ -7,32 +7,23 @@ pacemaker_status=$(systemctl is-active pacemaker)
# Run if pacemaker is running, we're the bootstrap node,
# and we're updating the deployment (not creating).
if [ "$pacemaker_status" = "active" -a \
- "$(hiera bootstrap_nodeid)" = "$(facter hostname)" -a \
- "$(hiera stack_action)" = "UPDATE" ]; then
-
- #ensure neutron constraints like
- #https://review.openstack.org/#/c/245093/
- if pcs constraint order show | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then
- pcs constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory
- fi
-
- pcs resource disable httpd
- check_resource httpd stopped 300
- pcs resource disable openstack-core
- check_resource openstack-core stopped 1800
-
- if pcs status | grep haproxy-clone; then
- pcs resource restart haproxy-clone
- fi
- pcs resource restart redis-master
- pcs resource restart mongod-clone
- pcs resource restart rabbitmq-clone
- pcs resource restart memcached-clone
- pcs resource restart galera-master
-
- pcs resource enable openstack-core
- check_resource openstack-core started 1800
- pcs resource enable httpd
- check_resource httpd started 800
+ "$(hiera bootstrap_nodeid)" = "$(facter hostname)" ]; then
+
+ TIMEOUT=600
+ SERVICES_TO_RESTART="$(ls /var/lib/tripleo/pacemaker-restarts)"
+ PCS_STATUS_OUTPUT="$(pcs status)"
+
+ for service in $SERVICES_TO_RESTART; do
+ if ! echo "$PCS_STATUS_OUTPUT" | grep $service; then
+ echo "Service $service not found as a pacemaker resource, cannot restart it."
+ exit 1
+ fi
+ done
+
+ for service in $SERVICES_TO_RESTART; do
+ echo "Restarting $service..."
+ pcs resource restart --wait=$TIMEOUT $service
+ rm -f /var/lib/tripleo/pacemaker-restarts/$service
+ done
fi