9 files changed, 57 insertions, 419 deletions
diff --git a/extraconfig/tasks/major_upgrade_block_storage.sh b/extraconfig/tasks/major_upgrade_block_storage.sh
deleted file mode 100644
index 64c4457e..00000000
--- a/extraconfig/tasks/major_upgrade_block_storage.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-#
-# This runs an upgrade of Cinder Block Storage nodes.
-#
-set -eu
-
-# Special-case OVS for https://bugs.launchpad.net/tripleo/+bug/1635205
-special_case_ovs_upgrade_if_needed
-
-yum -y install python-zaqarclient  # needed for os-collect-config
-yum -y -q update
diff --git a/extraconfig/tasks/major_upgrade_ceph_mon.sh b/extraconfig/tasks/major_upgrade_ceph_mon.sh
deleted file mode 100755
index e0d160f1..00000000
--- a/extraconfig/tasks/major_upgrade_ceph_mon.sh
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/bin/bash
-set -eu
-set -o pipefail
-
-echo INFO: starting $(basename "$0")
-
-# Exit if not running
-if ! pidof ceph-mon &> /dev/null; then
-    echo INFO: ceph-mon is not running, skipping
-    exit 0
-fi
-
-# Exit if not Hammer
-INSTALLED_VERSION=$(ceph --version | awk '{print $3}')
-if ! [[ "$INSTALLED_VERSION" =~ ^0\.94.* ]]; then
-    echo INFO: version of Ceph installed is not 0.94, skipping
-    exit 0
-fi
-
-CEPH_STATUS=$(ceph health | awk '{print $1}')
-if [ ${CEPH_STATUS} = HEALTH_ERR ]; then
-    echo ERROR: Ceph cluster status is HEALTH_ERR, cannot be upgraded
-    exit 1
-fi
-
-# Useful when upgrading with OSDs num < replica size
-if [[ ${ignore_ceph_upgrade_warnings:-False} != [Tt]rue ]]; then
-    timeout 300 bash -c "while [ ${CEPH_STATUS} != HEALTH_OK ]; do
-      echo WARNING: Waiting for Ceph cluster status to go HEALTH_OK;
-      sleep 30;
-      CEPH_STATUS=$(ceph health | awk '{print $1}')
-    done"
-fi
-
-MON_PID=$(pidof ceph-mon)
-MON_ID=$(hostname -s)
-
-# Stop daemon using Hammer sysvinit script
-service ceph stop mon.${MON_ID}
-
-# Ensure it's stopped
-timeout 60 bash -c "while kill -0 ${MON_PID} 2> /dev/null; do
-  sleep 2;
-done"
-
-# Update to Jewel
-yum -y -q update ceph-mon ceph
-
-# Restart/Exit if not on Jewel, only in that case we need the changes
-UPDATED_VERSION=$(ceph --version | awk '{print $3}')
-if [[ "$UPDATED_VERSION" =~ ^0\.94.* ]]; then
-    echo WARNING: Ceph was not upgraded, restarting daemons
-    service ceph start mon.${MON_ID}
-elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then
-    # RPM could own some of these but we can't take risks on the pre-existing files
-    for d in /var/lib/ceph/mon /var/log/ceph /var/run/ceph /etc/ceph; do
-        chown -L -R ceph:ceph $d || echo WARNING: chown of $d failed
-    done
-
-    # Replay udev events with newer rules
-    udevadm trigger
-
-    # Enable systemd unit
-    systemctl enable ceph-mon.target
-    systemctl enable ceph-mon@${MON_ID}
-    systemctl start ceph-mon@${MON_ID}
-
-    # Wait for daemon to be back in the quorum
-    timeout 300 bash -c "until (ceph quorum_status | jq .quorum_names | grep -sq ${MON_ID}); do
-      echo WARNING: Waiting for mon.${MON_ID} to re-join quorum;
-      sleep 10;
-    done"
-
-    # if tunables become legacy, cluster status will be HEALTH_WARN causing
-    # upgrade to fail on following node
-    ceph osd crush tunables default
-
-    echo INFO: Ceph was upgraded to Jewel
-else
-    echo ERROR: Ceph was upgraded to an unknown release, daemon is stopped, need manual intervention
-    exit 1
-fi
diff --git a/extraconfig/tasks/major_upgrade_ceph_storage.sh b/extraconfig/tasks/major_upgrade_ceph_storage.sh
deleted file mode 100644
index a745e723..00000000
--- a/extraconfig/tasks/major_upgrade_ceph_storage.sh
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/bin/bash
-#
-# This delivers the ceph-storage upgrade script to be invoked as part of the tripleo
-# major upgrade workflow.
-#
-set -eu
-set -o pipefail
-
-UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh
-
-declare -f special_case_ovs_upgrade_if_needed > $UPGRADE_SCRIPT
-# use >> here so we don't lose the declaration we added above
-cat >> $UPGRADE_SCRIPT << 'ENDOFCAT'
-#!/bin/bash
-### DO NOT MODIFY THIS FILE
-### This file is automatically delivered to the ceph-storage nodes as part of the
-### tripleo upgrades workflow
-set -eu
-
-echo INFO: starting $(basename "$0")
-
-# Exit if not running
-if ! pidof ceph-osd &> /dev/null; then
-    echo INFO: ceph-osd is not running, skipping
-    exit 0
-fi
-
-# Exit if not Hammer
-INSTALLED_VERSION=$(ceph --version | awk '{print $3}')
-if ! [[ "$INSTALLED_VERSION" =~ ^0\.94.* ]]; then
-    echo INFO: version of Ceph installed is not 0.94, skipping
-    exit 0
-fi
-
-OSD_PIDS=$(pidof ceph-osd)
-OSD_IDS=$(ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }')
-
-# "so that mirrors aren't rebalanced as if the OSD died" - gfidente / leseb
-ceph osd set noout
-ceph osd set norebalance
-ceph osd set nodeep-scrub
-ceph osd set noscrub
-
-# Stop daemon using Hammer sysvinit script
-for OSD_ID in $OSD_IDS; do
-    service ceph stop osd.${OSD_ID}
-done
-
-# Nice guy will return non-0 only when all failed
-timeout 60 bash -c "while kill -0 ${OSD_PIDS} 2> /dev/null; do
-  sleep 2;
-done"
-
-special_case_ovs_upgrade_if_needed
-
-# Update (Ceph to Jewel)
-yum -y install python-zaqarclient  # needed for os-collect-config
-yum -y update
-
-# Restart/Exit if not on Jewel, only in that case we need the changes
-UPDATED_VERSION=$(ceph --version | awk '{print $3}')
-if [[ "$UPDATED_VERSION" =~ ^0\.94.* ]]; then
-    echo WARNING: Ceph was not upgraded, restarting daemon
-    for OSD_ID in $OSD_IDS; do
-        service ceph start osd.${OSD_ID}
-    done
-elif [[ "$UPDATED_VERSION" =~ ^10\.2.* ]]; then
-    # RPM could own some of these but we can't take risks on the pre-existing files
-    for d in /var/lib/ceph/osd /var/log/ceph /var/run/ceph /etc/ceph; do
-        chown -L -R ceph:ceph $d || echo WARNING: chown of $d failed
-    done
-
-    # Replay udev events with newer rules
-    udevadm trigger && udevadm settle
-
-    # If on ext4, we need to enforce lower values for name and namespace len
-    # or ceph-osd will refuse to start, see: http://tracker.ceph.com/issues/16187
-    for OSD_ID in $OSD_IDS; do
-      OSD_FS=$(df -l --output=fstype /var/lib/ceph/osd/ceph-${OSD_ID} | tail -n +2)
-      if [ ${OSD_FS} = ext4 ]; then
-        crudini --set /etc/ceph/ceph.conf global osd_max_object_name_len 256
-        crudini --set /etc/ceph/ceph.conf global osd_max_object_namespace_len 64
-      fi
-    done
-
-    # Enable systemd unit
-    systemctl enable ceph-osd.target
-    for OSD_ID in $OSD_IDS; do
-        systemctl enable ceph-osd@${OSD_ID}
-        systemctl start ceph-osd@${OSD_ID}
-    done
-
-    echo INFO: Ceph was upgraded to Jewel
-else
-    echo ERROR: Ceph was upgraded to an unknown release, daemon is stopped, need manual intervention
-    exit 1
-fi
-
-ceph osd unset noout
-ceph osd unset norebalance
-ceph osd unset nodeep-scrub
-ceph osd unset noscrub
-ENDOFCAT
-
-# ensure the permissions are OK
-chmod 0755 $UPGRADE_SCRIPT
diff --git a/extraconfig/tasks/major_upgrade_compute.sh b/extraconfig/tasks/major_upgrade_compute.sh
deleted file mode 100644
index 7a3e1073..00000000
--- a/extraconfig/tasks/major_upgrade_compute.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-#
-# This delivers the compute upgrade script to be invoked as part of the tripleo
-# major upgrade workflow.
-#
-set -eu
-
-UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh
-
-cat > $UPGRADE_SCRIPT << ENDOFCAT
-### DO NOT MODIFY THIS FILE
-### This file is automatically delivered to the compute nodes as part of the
-### tripleo upgrades workflow
-
-set -eu
-
-# pin nova to kilo (messaging +-1) for the nova-compute service
-
-crudini  --set /etc/nova/nova.conf upgrade_levels compute $upgrade_level_nova_compute
-
-$(declare -f special_case_ovs_upgrade_if_needed)
-special_case_ovs_upgrade_if_needed
-
-yum -y install python-zaqarclient  # needed for os-collect-config
-yum -y update
-
-# Due to bug#1640177 we need to restart compute agent
-echo "Restarting openstack ceilometer agent compute"
-systemctl restart openstack-ceilometer-compute
-
-ENDOFCAT
-
-# ensure the permissions are OK
-chmod 0755 $UPGRADE_SCRIPT
-
diff --git a/extraconfig/tasks/major_upgrade_object_storage.sh b/extraconfig/tasks/major_upgrade_object_storage.sh
deleted file mode 100644
index d9d1b4d5..00000000
--- a/extraconfig/tasks/major_upgrade_object_storage.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-#
-# This delivers the swift-storage upgrade script to be invoked as part of the tripleo
-# major upgrade workflow.
-#
-set -eu
-
-UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh
-
-cat > $UPGRADE_SCRIPT << ENDOFCAT
-### DO NOT MODIFY THIS FILE
-### This file is automatically delivered to the swift-storage nodes as part of the
-### tripleo upgrades workflow
-
-set -eu
-
-function systemctl_swift {
-    action=\$1
-    for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
-             openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
-             openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object; do
-                systemctl \$action \$S
-    done
-}
-
-$(declare -f special_case_ovs_upgrade_if_needed)
-special_case_ovs_upgrade_if_needed
-
-systemctl_swift stop
-
-yum -y install python-zaqarclient  # needed for os-collect-config
-yum -y update
-
-systemctl_swift start
-
-
-
-ENDOFCAT
-
-# ensure the permissions are OK
-chmod 0755 $UPGRADE_SCRIPT
-
diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml
index b63aafbd..8c91027d 100644
--- a/extraconfig/tasks/major_upgrade_pacemaker.yaml
+++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml
@@ -33,33 +33,6 @@ resources:
   # map_merge with input_values instead of feeding params into scripts
   # via str_replace on bash snippets
 
-  CephMonUpgradeConfig:
-    type: OS::Heat::SoftwareConfig
-    properties:
-      group: script
-      config:
-        list_join:
-        - ''
-        - - str_replace:
-              template: |
-                #!/bin/bash
-                ignore_ceph_upgrade_warnings='IGNORE_CEPH_UPGRADE_WARNINGS'
-              params:
-                IGNORE_CEPH_UPGRADE_WARNINGS: {get_param: IgnoreCephUpgradeWarnings}
-          - get_file: major_upgrade_ceph_mon.sh
-
-  CephMonUpgradeDeployment:
-    type: OS::Heat::SoftwareDeploymentGroup
-    properties:
-      servers: {get_param: [servers, Controller]}
-      config: {get_resource: CephMonUpgradeConfig}
-      input_values: {get_param: input_values}
-    update_policy:
-      batch_create:
-        max_batch_size: 1
-      rolling_update:
-        max_batch_size: 1
-
   ControllerPacemakerUpgradeConfig_Step1:
     type: OS::Heat::SoftwareConfig
     properties:
@@ -86,30 +59,11 @@ resources:
 
   ControllerPacemakerUpgradeDeployment_Step1:
     type: OS::Heat::SoftwareDeploymentGroup
-    depends_on: CephMonUpgradeDeployment
     properties:
       servers:  {get_param: [servers, Controller]}
       config: {get_resource: ControllerPacemakerUpgradeConfig_Step1}
       input_values: {get_param: input_values}
 
-  BlockStorageUpgradeConfig:
-    type: OS::Heat::SoftwareConfig
-    depends_on: ControllerPacemakerUpgradeDeployment_Step1
-    properties:
-      group: script
-      config:
-        list_join:
-        - ''
-        - - get_file: pacemaker_common_functions.sh
-          - get_file: major_upgrade_block_storage.sh
-
-  BlockStorageUpgradeDeployment:
-    type: OS::Heat::SoftwareDeploymentGroup
-    properties:
-      servers:  {get_param: [servers, BlockStorage]}
-      config: {get_resource: BlockStorageUpgradeConfig}
-      input_values: {get_param: input_values}
-
   ControllerPacemakerUpgradeConfig_Step2:
     type: OS::Heat::SoftwareConfig
     properties:
@@ -136,7 +90,7 @@ resources:
 
   ControllerPacemakerUpgradeDeployment_Step2:
     type: OS::Heat::SoftwareDeploymentGroup
-    depends_on: BlockStorageUpgradeDeployment
+    depends_on: ControllerPacemakerUpgradeDeployment_Step1
     properties:
       servers:  {get_param: [servers, Controller]}
       config: {get_resource: ControllerPacemakerUpgradeConfig_Step2}
diff --git a/extraconfig/tasks/major_upgrade_pacemaker_init.j2.yaml b/extraconfig/tasks/major_upgrade_pacemaker_init.j2.yaml
deleted file mode 100644
index c308720b..00000000
--- a/extraconfig/tasks/major_upgrade_pacemaker_init.j2.yaml
+++ /dev/null
@@ -1,96 +0,0 @@
-heat_template_version: ocata
-description: 'Upgrade for Pacemaker deployments'
-
-parameters:
-
-  servers:
-    type: json
-  input_values:
-    type: json
-    description: input values for the software deployments
-
-  UpgradeInitCommand:
-    type: string
-    description: |
-      Command or script snippet to run on all overcloud nodes to
-      initialize the upgrade process. E.g. a repository switch.
-    default: ''
-  UpgradeLevelNovaCompute:
-    type: string
-    description: Nova Compute upgrade level
-    default: ''
-
-resources:
-
-  # For the UpgradeInit also rename /etc/resolv.conf.save for +bug/1567004
-
-  UpgradeInitConfig:
-    type: OS::Heat::SoftwareConfig
-    properties:
-      group: script
-      config:
-        list_join:
-        - ''
-        - - "#!/bin/bash\n\n"
-          - "if [[ -f /etc/resolv.conf.save ]] ; then rm /etc/resolv.conf.save; fi\n\n"
-          - get_param: UpgradeInitCommand
-
-  # TODO(jistr): for Mitaka->Newton upgrades and further we can use
-  # map_merge with input_values instead of feeding params into scripts
-  # via str_replace on bash snippets
-
-  # FIXME(shardy) we have hard-coded per-role *ScriptConfig's here
-  # Would be better to have a common config for all roles
-  ComputeDeliverUpgradeScriptConfig:
-    type: OS::Heat::SoftwareConfig
-    properties:
-      group: script
-      config:
-        list_join:
-        - ''
-        - - str_replace:
-              template: |
-                #!/bin/bash
-                upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE'
-              params:
-                UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute}
-          - get_file: pacemaker_common_functions.sh
-          - get_file: major_upgrade_compute.sh
-
-  ObjectStorageDeliverUpgradeScriptConfig:
-    type: OS::Heat::SoftwareConfig
-    properties:
-      group: script
-      config:
-        list_join:
-        - ''
-        - - get_file: pacemaker_common_functions.sh
-          - get_file: major_upgrade_object_storage.sh
-
-  CephStorageDeliverUpgradeScriptConfig:
-    type: OS::Heat::SoftwareConfig
-    properties:
-      group: script
-      config:
-        list_join:
-        - ''
-        - - get_file: pacemaker_common_functions.sh
-          - get_file: major_upgrade_ceph_storage.sh
-
-{% for role in roles %}
-  UpgradeInit{{role.name}}Deployment:
-    type: OS::Heat::SoftwareDeploymentGroup
-    properties:
-      servers:  {get_param: [servers, {{role.name}}]}
-      config: {get_resource: UpgradeInitConfig}
-      input_values: {get_param: input_values}
-
-  {% if not role.name in ['Controller', 'BlockStorage'] %}
-  {{role.name}}DeliverUpgradeScriptDeployment:
-    type: OS::Heat::SoftwareDeploymentGroup
-    properties:
-      servers:  {get_param: [servers, {{role.name}}]}
-      config: {get_resource: {{role.name}}DeliverUpgradeScriptConfig}
-      input_values: {get_param: input_values}
-  {% endif %}
-{% endfor %}
diff --git a/extraconfig/tasks/tripleo_upgrade_node.sh b/extraconfig/tasks/tripleo_upgrade_node.sh
new file mode 100644
index 00000000..7f056021
--- /dev/null
+++ b/extraconfig/tasks/tripleo_upgrade_node.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# This delivers the operator driven upgrade script to be invoked as part of
+# the tripleo major upgrade workflow. The utility 'upgrade-non-controller.sh'
+# is used from the undercloud to invoke the /root/tripleo_upgrade_node.sh
+#
+set -eu
+
+UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh
+
+cat > $UPGRADE_SCRIPT << ENDOFCAT
+### DO NOT MODIFY THIS FILE
+### This file is automatically delivered to those nodes where the
+### disable_upgrade_deployment flag is set in roles_data.yaml.
+
+set -eu
+NOVA_COMPUTE=""
+if systemctl show 'openstack-nova-compute' --property ActiveState | grep '\bactive\b'; then
+   NOVA_COMPUTE="true"
+fi
+
+DEBUG="true"
+SCRIPT_NAME=$(basename $0)
+$(declare -f log_debug)
+$(declare -f manage_systemd_service)
+$(declare -f systemctl_swift)
+
+# pin nova messaging +-1 for the nova-compute service
+if [[ -n \$NOVA_COMPUTE ]]; then
+    crudini  --set /etc/nova/nova.conf upgrade_levels compute auto
+fi
+
+$(declare -f special_case_ovs_upgrade_if_needed)
+special_case_ovs_upgrade_if_needed
+
+yum -y install python-zaqarclient  # needed for os-collect-config
+systemctl_swift stop
+yum -y update
+systemctl_swift start
+
+# Due to bug#1640177 we need to restart compute agent
+if [[ -n \$NOVA_COMPUTE ]]; then
+    echo "Restarting openstack ceilometer agent compute"
+    systemctl restart openstack-ceilometer-compute
+fi
+
+ENDOFCAT
+
+# ensure the permissions are OK
+chmod 0755 $UPGRADE_SCRIPT
+
diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh
index edcc9e8e..c66dd01f 100755
--- a/extraconfig/tasks/yum_update.sh
+++ b/extraconfig/tasks/yum_update.sh
@@ -10,6 +10,11 @@
 echo "Started yum_update.sh on server $deploy_server_id at `date`"
 echo -n "false" > $heat_outputs_path.update_managed_packages
 
+if [ -f /.dockerenv ]; then
+    echo "Not running due to running inside a container"
+    exit 0
+fi
+
 if [[ -z "$update_identifier" ]]; then
     echo "Not running due to unset update_identifier"
     exit 0