From 0dd10ffe4fcd9b191eaceabcd7bb124a4db10b06 Mon Sep 17 00:00:00 2001
From: Jiri Stransky <jistr@redhat.com>
Date: Thu, 21 Jan 2016 13:11:23 +0100
Subject: Introduce update/upgrade workflow

Change-Id: I7226070aa87416e79f25625647f8e3076c9e2c9a
---
 environments/major-upgrade-pacemaker.yaml          |  9 ++-
 .../tasks/major_upgrade_controller_pacemaker_1.sh  | 58 ++++++++++++++++++
 .../tasks/major_upgrade_controller_pacemaker_2.sh  | 71 ++++++++++++++++++++++
 extraconfig/tasks/major_upgrade_pacemaker.yaml     | 20 ++++--
 extraconfig/tasks/major_upgrade_pacemaker_1.sh     | 58 ------------------
 extraconfig/tasks/major_upgrade_pacemaker_2.sh     | 71 ----------------------
 extraconfig/tasks/noop.yaml                        | 16 +++++
 extraconfig/tasks/yum_update_noop.yaml             | 29 +++++++++
 overcloud-resource-registry-puppet.yaml            |  1 +
 overcloud.yaml                                     | 13 ++++
 10 files changed, 209 insertions(+), 137 deletions(-)
 create mode 100755 extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
 create mode 100755 extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
 delete mode 100755 extraconfig/tasks/major_upgrade_pacemaker_1.sh
 delete mode 100755 extraconfig/tasks/major_upgrade_pacemaker_2.sh
 create mode 100644 extraconfig/tasks/yum_update_noop.yaml

diff --git a/environments/major-upgrade-pacemaker.yaml b/environments/major-upgrade-pacemaker.yaml
index db078404..e3bbfcce 100644
--- a/environments/major-upgrade-pacemaker.yaml
+++ b/environments/major-upgrade-pacemaker.yaml
@@ -1,3 +1,8 @@
 resource_registry:
-  OS::TripleO::Tasks::ControllerPrePuppet: ../extraconfig/tasks/major_upgrade_pacemaker.yaml
-  OS::TripleO::Tasks::ControllerPostPuppet: ../extraconfig/tasks/noop.yaml
+  OS::TripleO::Tasks::UpdateWorkflow: ../extraconfig/tasks/major_upgrade_pacemaker.yaml
+  OS::TripleO::Tasks::PackageUpdate:  ../extraconfig/tasks/yum_update_noop.yaml
+  OS::TripleO::ControllerPostDeployment: OS::Heat::None
+  OS::TripleO::ComputePostDeployment: OS::Heat::None
+  OS::TripleO::ObjectStoragePostDeployment: OS::Heat::None
+  OS::TripleO::BlockStoragePostDeployment: OS::Heat::None
+  OS::TripleO::CephStoragePostDeployment: OS::Heat::None
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
new file mode 100755
index 00000000..bee9a939
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+set -eu
+
+cluster_sync_timeout=600
+
+if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then
+    echo_error "ERROR: upgrade cannot start with some cluster nodes being offline"
+    exit 1
+fi
+
+if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
+    pcs resource disable httpd
+    check_resource httpd stopped 1800
+    if pcs status | grep openstack-keystone; then
+        pcs resource disable openstack-keystone
+        check_resource openstack-keystone stopped 1800
+    fi
+    pcs resource disable redis
+    check_resource redis stopped 600
+    pcs resource disable mongod
+    check_resource mongod stopped 600
+    pcs resource disable rabbitmq
+    check_resource rabbitmq stopped 600
+    pcs resource disable memcached
+    check_resource memcached stopped 600
+    pcs resource disable galera
+    check_resource galera stopped 600
+    pcs cluster stop --all
+fi
+
+# Swift isn't controled by pacemaker
+for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
+openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
+openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do
+    systemctl stop $S
+done
+
+tstart=$(date +%s)
+while systemctl is-active pacemaker; do
+    sleep 5
+    tnow=$(date +%s)
+    if (( tnow-tstart > cluster_sync_timeout )) ; then
+        echo_error "ERROR: cluster shutdown timed out"
+        exit 1
+    fi
+done
+
+yum update -y
+
+# Pin messages sent to compute nodes to kilo, these will be upgraded later
+crudini  --set /etc/nova/nova.conf upgrade_levels compute liberty
+# https://bugzilla.redhat.com/show_bug.cgi?id=1284047
+# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435
+crudini  --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit
+# https://bugzilla.redhat.com/show_bug.cgi?id=1284058
+# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists
+crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server"
diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
new file mode 100755
index 00000000..0b92a3bb
--- /dev/null
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+set -eu
+
+cluster_form_timeout=600
+cluster_settle_timeout=600
+galera_sync_timeout=600
+
+if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
+    pcs cluster start --all
+
+    tstart=$(date +%s)
+    while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do
+        sleep 5
+        tnow=$(date +%s)
+        if (( tnow-tstart > cluster_form_timeout )) ; then
+            echo_error "ERROR: timed out forming the cluster"
+            exit 1
+        fi
+    done
+
+    if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
+        echo_error "ERROR: timed out waiting for cluster to finish transition"
+        exit 1
+    fi
+
+    pcs resource enable galera
+    check_resource galera started 600
+    pcs resource enable mongod
+    check_resource mongod started 600
+
+    tstart=$(date +%s)
+    while ! clustercheck; do
+        sleep 5
+        tnow=$(date +%s)
+        if (( tnow-tstart > galera_sync_timeout )) ; then
+            echo_error "ERROR galera sync timed out"
+            exit 1
+        fi
+    done
+
+    # Run all the db syncs
+    # TODO: check if this can be triggered in puppet and removed from here
+    ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf
+    cinder-manage db sync
+    glance-manage --config-file=/etc/glance/glance-registry.conf db_sync
+    heat-manage --config-file /etc/heat/heat.conf db_sync
+    keystone-manage db_sync
+    neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
+    nova-manage db sync
+
+    pcs resource enable memcached
+    check_resource memcached started 600
+    pcs resource enable rabbitmq
+    check_resource rabbitmq started 600
+    pcs resource enable redis
+    check_resource redis started 600
+    if pcs status | grep openstack-keystone; then
+        pcs resource enable openstack-keystone
+        check_resource openstack-keystone started 1800
+    fi
+    pcs resource enable httpd
+    check_resource httpd started 1800
+fi
+
+# Swift isn't controled by heat
+for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
+openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
+openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do
+    systemctl start $S
+done
diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml
index 58d7b6d8..12268b0e 100644
--- a/extraconfig/tasks/major_upgrade_pacemaker.yaml
+++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml
@@ -2,7 +2,15 @@ heat_template_version: 2014-10-16
 description: 'Upgrade for Pacemaker deployments'
 
 parameters:
-  servers:
+  controller_servers:
+    type: json
+  compute_servers:
+    type: json
+  blockstorage_servers:
+    type: json
+  objectstorage_servers:
+    type: json
+  cephstorage_servers:
     type: json
   input_values:
     type: json
@@ -17,12 +25,12 @@ resources:
         list_join:
         - ''
         - - get_file: pacemaker_common_functions.sh
-          - get_file: major_upgrade_pacemaker_1.sh
+          - get_file: major_upgrade_controller_pacemaker_1.sh
 
   ControllerPacemakerUpgradeDeployment_Step1:
     type: OS::Heat::SoftwareDeploymentGroup
     properties:
-      servers:  {get_param: servers}
+      servers:  {get_param: controller_servers}
       config: {get_resource: ControllerPacemakerUpgradeConfig_Step1}
       input_values: {get_param: input_values}
 
@@ -34,12 +42,12 @@ resources:
         list_join:
         - ''
         - - get_file: pacemaker_common_functions.sh
-          - get_file: major_upgrade_pacemaker_2.sh
+          - get_file: major_upgrade_controller_pacemaker_2.sh
 
-  ControllerPacemakerUpgrade2Deployment_Step2:
+  ControllerPacemakerUpgradeDeployment_Step2:
     type: OS::Heat::SoftwareDeploymentGroup
     depends_on: ControllerPacemakerUpgradeDeployment_Step1
     properties:
-      servers:  {get_param: servers}
+      servers:  {get_param: controller_servers}
       config: {get_resource: ControllerPacemakerUpgradeConfig_Step2}
       input_values: {get_param: input_values}
diff --git a/extraconfig/tasks/major_upgrade_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_pacemaker_1.sh
deleted file mode 100755
index bee9a939..00000000
--- a/extraconfig/tasks/major_upgrade_pacemaker_1.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-set -eu
-
-cluster_sync_timeout=600
-
-if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then
-    echo_error "ERROR: upgrade cannot start with some cluster nodes being offline"
-    exit 1
-fi
-
-if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
-    pcs resource disable httpd
-    check_resource httpd stopped 1800
-    if pcs status | grep openstack-keystone; then
-        pcs resource disable openstack-keystone
-        check_resource openstack-keystone stopped 1800
-    fi
-    pcs resource disable redis
-    check_resource redis stopped 600
-    pcs resource disable mongod
-    check_resource mongod stopped 600
-    pcs resource disable rabbitmq
-    check_resource rabbitmq stopped 600
-    pcs resource disable memcached
-    check_resource memcached stopped 600
-    pcs resource disable galera
-    check_resource galera stopped 600
-    pcs cluster stop --all
-fi
-
-# Swift isn't controled by pacemaker
-for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
-openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
-openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do
-    systemctl stop $S
-done
-
-tstart=$(date +%s)
-while systemctl is-active pacemaker; do
-    sleep 5
-    tnow=$(date +%s)
-    if (( tnow-tstart > cluster_sync_timeout )) ; then
-        echo_error "ERROR: cluster shutdown timed out"
-        exit 1
-    fi
-done
-
-yum update -y
-
-# Pin messages sent to compute nodes to kilo, these will be upgraded later
-crudini  --set /etc/nova/nova.conf upgrade_levels compute liberty
-# https://bugzilla.redhat.com/show_bug.cgi?id=1284047
-# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435
-crudini  --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit
-# https://bugzilla.redhat.com/show_bug.cgi?id=1284058
-# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists
-crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server"
diff --git a/extraconfig/tasks/major_upgrade_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_pacemaker_2.sh
deleted file mode 100755
index 0b92a3bb..00000000
--- a/extraconfig/tasks/major_upgrade_pacemaker_2.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-
-set -eu
-
-cluster_form_timeout=600
-cluster_settle_timeout=600
-galera_sync_timeout=600
-
-if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
-    pcs cluster start --all
-
-    tstart=$(date +%s)
-    while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do
-        sleep 5
-        tnow=$(date +%s)
-        if (( tnow-tstart > cluster_form_timeout )) ; then
-            echo_error "ERROR: timed out forming the cluster"
-            exit 1
-        fi
-    done
-
-    if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
-        echo_error "ERROR: timed out waiting for cluster to finish transition"
-        exit 1
-    fi
-
-    pcs resource enable galera
-    check_resource galera started 600
-    pcs resource enable mongod
-    check_resource mongod started 600
-
-    tstart=$(date +%s)
-    while ! clustercheck; do
-        sleep 5
-        tnow=$(date +%s)
-        if (( tnow-tstart > galera_sync_timeout )) ; then
-            echo_error "ERROR galera sync timed out"
-            exit 1
-        fi
-    done
-
-    # Run all the db syncs
-    # TODO: check if this can be triggered in puppet and removed from here
-    ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf
-    cinder-manage db sync
-    glance-manage --config-file=/etc/glance/glance-registry.conf db_sync
-    heat-manage --config-file /etc/heat/heat.conf db_sync
-    keystone-manage db_sync
-    neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
-    nova-manage db sync
-
-    pcs resource enable memcached
-    check_resource memcached started 600
-    pcs resource enable rabbitmq
-    check_resource rabbitmq started 600
-    pcs resource enable redis
-    check_resource redis started 600
-    if pcs status | grep openstack-keystone; then
-        pcs resource enable openstack-keystone
-        check_resource openstack-keystone started 1800
-    fi
-    pcs resource enable httpd
-    check_resource httpd started 1800
-fi
-
-# Swift isn't controled by heat
-for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
-openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
-openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy; do
-    systemctl start $S
-done
diff --git a/extraconfig/tasks/noop.yaml b/extraconfig/tasks/noop.yaml
index 0cff7469..dbb863be 100644
--- a/extraconfig/tasks/noop.yaml
+++ b/extraconfig/tasks/noop.yaml
@@ -4,6 +4,22 @@ description: 'No-op task'
 parameters:
   servers:
     type: json
+    default: []
+  controller_servers:
+    type: json
+    default: []
+  compute_servers:
+    type: json
+    default: []
+  blockstorage_servers:
+    type: json
+    default: []
+  objectstorage_servers:
+    type: json
+    default: []
+  cephstorage_servers:
+    type: json
+    default: []
   input_values:
     type: json
     default: {}
diff --git a/extraconfig/tasks/yum_update_noop.yaml b/extraconfig/tasks/yum_update_noop.yaml
new file mode 100644
index 00000000..b759d9c5
--- /dev/null
+++ b/extraconfig/tasks/yum_update_noop.yaml
@@ -0,0 +1,29 @@
+heat_template_version: 2014-10-16
+description: 'No-op yum update task'
+
+resources:
+
+  config:
+    type: OS::Heat::SoftwareConfig
+    properties:
+      group: script
+      config: |
+        #!/bin/bash
+        echo -n "false" > $heat_outputs_path.update_managed_packages
+      inputs:
+      - name: update_identifier
+        description: yum will only run for previously unused values of update_identifier
+        default: ''
+      - name: command
+        description: yum sub-command to run, defaults to "update"
+        default: update
+      - name: command_arguments
+        description: yum command arguments, defaults to ""
+        default: ''
+      outputs:
+      - name: update_managed_packages
+        description: boolean value indicating whether to upgrade managed packages
+
+outputs:
+  OS::stack_id:
+    value: {get_resource: config}
diff --git a/overcloud-resource-registry-puppet.yaml b/overcloud-resource-registry-puppet.yaml
index 7288aba8..b1a5816e 100644
--- a/overcloud-resource-registry-puppet.yaml
+++ b/overcloud-resource-registry-puppet.yaml
@@ -23,6 +23,7 @@ resource_registry:
   OS::TripleO::BootstrapNode::SoftwareConfig: puppet/bootstrap-config.yaml
 
   # Tasks (for internal TripleO usage)
+  OS::TripleO::Tasks::UpdateWorkflow: extraconfig/tasks/noop.yaml
   OS::TripleO::Tasks::PackageUpdate: extraconfig/tasks/yum_update.yaml
   OS::TripleO::Tasks::ControllerPrePuppet: extraconfig/tasks/noop.yaml
   OS::TripleO::Tasks::ControllerPostPuppet: extraconfig/tasks/noop.yaml
diff --git a/overcloud.yaml b/overcloud.yaml
index 9b95484e..0499fa67 100644
--- a/overcloud.yaml
+++ b/overcloud.yaml
@@ -1522,10 +1522,23 @@ resources:
       config: {get_resource: AllNodesValidationConfig}
       servers: {get_attr: [CephStorage, attributes, nova_server_resource]}
 
+  UpdateWorkflow:
+    type: OS::TripleO::Tasks::UpdateWorkflow
+    properties:
+      controller_servers: {get_attr: [Controller, attributes, nova_server_resource]}
+      compute_servers: {get_attr: [Compute, attributes, nova_server_resource]}
+      blockstorage_servers: {get_attr: [BlockStorage, attributes, nova_server_resource]}
+      objectstorage_servers: {get_attr: [ObjectStorage, attributes, nova_server_resource]}
+      cephstorage_servers: {get_attr: [CephStorage, attributes, nova_server_resource]}
+      input_values:
+        deploy_identifier: {get_param: DeployIdentifier}
+        update_identifier: {get_param: UpdateIdentifier}
+
   # Optional ExtraConfig for all nodes - all roles are passed in here, but
   # the nested template may configure each role differently (or not at all)
   AllNodesExtraConfig:
     type: OS::TripleO::AllNodesExtraConfig
+    depends_on: UpdateWorkflow
     properties:
       controller_servers: {get_attr: [Controller, attributes, nova_server_resource]}
       compute_servers: {get_attr: [Compute, attributes, nova_server_resource]}
-- 
cgit 1.2.3-korg