11 files changed, 379 insertions, 9 deletions
diff --git a/extraconfig/all_nodes/default.yaml b/extraconfig/all_nodes/default.yaml
new file mode 100644
index 00000000..68f9eadd
--- /dev/null
+++ b/extraconfig/all_nodes/default.yaml
@@ -0,0 +1,27 @@
+heat_template_version: 2014-10-16
+
+description: >
+  Noop extra config for allnodes extra cluster config
+
+# Parameters passed from the parent template - note if you maintain
+# out-of-tree templates they may require additional parameters if the
+# in-tree templates add a new role.
+parameters:
+  controller_servers:
+    type: json
+  compute_servers:
+    type: json
+  blockstorage_servers:
+    type: json
+  objectstorage_servers:
+    type: json
+  cephstorage_servers:
+    type: json
+# Note extra parameters can be defined, then passed data via the
+# environment parameter_defaults, without modifying the parent template
+
+outputs:
+  # This value should change if the configuration data has changed
+  # It is used to e.g re-apply puppet after hieradata values change.
+  config_identifier:
+    value: none
diff --git a/extraconfig/all_nodes/mac_hostname.yaml b/extraconfig/all_nodes/mac_hostname.yaml
new file mode 100644
index 00000000..739cbf0a
--- /dev/null
+++ b/extraconfig/all_nodes/mac_hostname.yaml
@@ -0,0 +1,116 @@
+heat_template_version: 2014-10-16
+
+description: >
+  Example extra config for cluster config
+  this example collects the hostname and MAC addresses for each node in
+  the deployment, then distributes that info to all Controller nodes.
+
+# Parameters passed from the parent template - note if you maintain
+# out-of-tree templates they may require additional parameters if the
+# in-tree templates add a new role.
+parameters:
+  controller_servers:
+    type: json
+  compute_servers:
+    type: json
+  blockstorage_servers:
+    type: json
+  objectstorage_servers:
+    type: json
+  cephstorage_servers:
+    type: json
+# Note extra parameters can be defined, then passed data via the
+# environment parameter_defaults, without modifying the parent template
+
+resources:
+
+  CollectMacConfig:
+    type: OS::Heat::SoftwareConfig
+    properties:
+      group: script
+      config: |
+        #!/bin/sh
+        MACS=$(ifconfig  | grep ether | awk '{print $2}' | tr "\n" " ")
+        HOSTNAME=$(hostname -s)
+        echo "$HOSTNAME $MACS"
+
+  # FIXME(shardy): Long term it'd be better if Heat SoftwareDeployments accepted
+  # list instead of a map, then we could join the lists of servers into one
+  # deployment instead of requiring one deployment per-role.
+  CollectMacDeploymentsController:
+    type: OS::Heat::SoftwareDeployments
+    properties:
+      servers:  {get_param: controller_servers}
+      config: {get_resource: CollectMacConfig}
+      actions: ['CREATE'] # Only do this on CREATE
+
+  CollectMacDeploymentsCompute:
+    type: OS::Heat::SoftwareDeployments
+    properties:
+      servers:  {get_param: compute_servers}
+      config: {get_resource: CollectMacConfig}
+      actions: ['CREATE'] # Only do this on CREATE
+
+  CollectMacDeploymentsBlockStorage:
+    type: OS::Heat::SoftwareDeployments
+    properties:
+      servers:  {get_param: blockstorage_servers}
+      config: {get_resource: CollectMacConfig}
+      actions: ['CREATE'] # Only do this on CREATE
+
+  CollectMacDeploymentsObjectStorage:
+    type: OS::Heat::SoftwareDeployments
+    properties:
+      servers:  {get_param: objectstorage_servers}
+      config: {get_resource: CollectMacConfig}
+      actions: ['CREATE'] # Only do this on CREATE
+
+  CollectMacDeploymentsCephStorage:
+    type: OS::Heat::SoftwareDeployments
+    properties:
+      servers:  {get_param: cephstorage_servers}
+      config: {get_resource: CollectMacConfig}
+      actions: ['CREATE'] # Only do this on CREATE
+
+  # Now we distribute all-the-macs to all nodes
+  DistributeMacConfig:
+    type: OS::Heat::SoftwareConfig
+    properties:
+      group: script
+      inputs:
+      - name: controller_mappings
+      - name: compute_mappings
+      - name: blockstorage_mappings
+      - name: objectstorage_mappings
+      - name: cephstorage_mappings
+      config: |
+        #!/bin/sh
+        echo $controller_mappings > /root/controller_mappings
+        echo $compute_mappings > /root/compute_mappings
+        echo $blockstorage_mappings > /root/blockstorage_mappings
+        echo $objectstorage_mappings > /root/objectstorage_mappings
+        echo $cephstorage_mappings > /root/cephstorage_mappings
+        echo "mappings = $(cat /root/*_mappings)"
+
+  DistributeMacDeploymentsController:
+    type: OS::Heat::SoftwareDeployments
+    properties:
+      servers:  {get_param: controller_servers}
+      config: {get_resource: DistributeMacConfig}
+      input_values:
+        # FIXME(shardy): It'd be more convenient if we could join these
+        # items together but because the returned format is a map (not a list)
+        # we can't use list_join or str_replace.  Possible Heat TODO.
+        controller_mappings: {get_attr: [CollectMacDeploymentsController, deploy_stdouts]}
+        compute_mappings: {get_attr: [CollectMacDeploymentsCompute, deploy_stdouts]}
+        blockstorage_mappings: {get_attr: [CollectMacDeploymentsBlockStorage, deploy_stdouts]}
+        objectstorage_mappings: {get_attr: [CollectMacDeploymentsObjectStorage, deploy_stdouts]}
+        cephstorage_mappings: {get_attr: [CollectMacDeploymentsCephStorage, deploy_stdouts]}
+      actions: ['CREATE'] # Only do this on CREATE
+
+outputs:
+  # This value should change if the configuration data has changed
+  # It is used to e.g re-apply puppet after hieradata values change.
+  config_identifier:
+    value: {get_attr: [DistributeMacDeploymentsController, deploy_stdouts]}
+
diff --git a/extraconfig/all_nodes/random_string.yaml b/extraconfig/all_nodes/random_string.yaml
new file mode 100644
index 00000000..b4b30274
--- /dev/null
+++ b/extraconfig/all_nodes/random_string.yaml
@@ -0,0 +1,63 @@
+heat_template_version: 2014-10-16
+
+description: >
+  Example extra config for cluster config
+  this example deploys a random string to all controller and compute
+  nodes, showing how data may be shared amongst nodes, vs the
+  other ExtraConfig interfaces which act only on individual nodes.
+
+# Parameters passed from the parent template - note if you maintain
+# out-of-tree templates they may require additional parameters if the
+# in-tree templates add a new role.
+parameters:
+  controller_servers:
+    type: json
+  compute_servers:
+    type: json
+  blockstorage_servers:
+    type: json
+  objectstorage_servers:
+    type: json
+  cephstorage_servers:
+    type: json
+# Note extra parameters can be defined, then passed data via the
+# environment parameter_defaults, without modifying the parent template
+
+resources:
+
+  Random:
+    type: OS::Heat::RandomString
+
+  RandomConfig:
+    type: OS::Heat::SoftwareConfig
+    properties:
+      group: script
+      inputs:
+      - name: random_value
+      config: |
+        #!/bin/sh
+        echo $random_value > /root/random_value
+
+  RandomDeploymentsController:
+    type: OS::Heat::SoftwareDeployments
+    properties:
+      servers:  {get_param: controller_servers}
+      config: {get_resource: RandomConfig}
+      actions: ['CREATE'] # Only do this on CREATE
+      input_values:
+        random_value: {get_attr: [Random, value]}
+
+  RandomDeploymentsCompute:
+    type: OS::Heat::SoftwareDeployments
+    properties:
+      servers:  {get_param: compute_servers}
+      config: {get_resource: RandomConfig}
+      actions: ['CREATE'] # Only do this on CREATE
+      input_values:
+        random_value: {get_attr: [Random, value]}
+
+outputs:
+  # This value should change if the configuration data has changed
+  # It is used to e.g re-apply puppet after hieradata values change.
+  config_identifier:
+    value: {get_attr: [Random, value]}
diff --git a/extraconfig/post_deploy/rhel-registration/rhel-registration-resource-registry.yaml b/extraconfig/post_deploy/rhel-registration/rhel-registration-resource-registry.yaml
deleted file mode 100644
index 7b48392d..00000000
--- a/extraconfig/post_deploy/rhel-registration/rhel-registration-resource-registry.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-resource_registry:
-  OS::TripleO::NodeExtraConfigPost: rhel-registration.yaml
diff --git a/extraconfig/post_deploy/rhel-registration/environment-rhel-registration.yaml b/extraconfig/pre_deploy/rhel-registration/environment-rhel-registration.yaml
index 70437a8a..70437a8a 100644
--- a/extraconfig/post_deploy/rhel-registration/environment-rhel-registration.yaml
+++ b/extraconfig/pre_deploy/rhel-registration/environment-rhel-registration.yaml
diff --git a/extraconfig/pre_deploy/rhel-registration/rhel-registration-resource-registry.yaml b/extraconfig/pre_deploy/rhel-registration/rhel-registration-resource-registry.yaml
new file mode 100644
index 00000000..75453302
--- /dev/null
+++ b/extraconfig/pre_deploy/rhel-registration/rhel-registration-resource-registry.yaml
@@ -0,0 +1,2 @@
+resource_registry:
+  OS::TripleO::NodeExtraConfig: rhel-registration.yaml
diff --git a/extraconfig/post_deploy/rhel-registration/rhel-registration.yaml b/extraconfig/pre_deploy/rhel-registration/rhel-registration.yaml
index bf6c88cd..d5160915 100644
--- a/extraconfig/post_deploy/rhel-registration/rhel-registration.yaml
+++ b/extraconfig/pre_deploy/rhel-registration/rhel-registration.yaml
@@ -6,8 +6,8 @@ description: >
 # Note extra parameters can be defined, then passed data via the
 # environment parameter_defaults, without modifying the parent template
 parameters:
-  servers:
-    type: json
+  server:
+    type: string
   # To be defined via a local or global environment in parameter_defaults
   rhel_reg_activation_key:
     type: string
@@ -71,9 +71,9 @@ resources:
       config: {get_file: scripts/rhel-registration}
 
   RHELRegistrationDeployment:
-    type: OS::Heat::SoftwareDeployments
+    type: OS::Heat::SoftwareDeployment
     properties:
-      servers:  {get_param: servers}
+      server:  {get_param: server}
       config: {get_resource: RHELRegistration}
       actions: ['CREATE'] # Only do this on CREATE
       input_values:
@@ -104,10 +104,16 @@ resources:
         - name: REG_METHOD
 
   RHELUnregistrationDeployment:
-    type: OS::Heat::SoftwareDeployments
+    type: OS::Heat::SoftwareDeployment
     properties:
-      servers:  {get_param: servers}
+      server:  {get_param: server}
       config: {get_resource: RHELUnregistration}
       actions: ['DELETE'] # Only do this on DELETE
       input_values:
         REG_METHOD: {get_param: rhel_reg_method}
+
+outputs:
+  deploy_stdout:
+    description: Deployment reference, used to trigger puppet apply on changes
+    value: {get_attr: [RHELRegistrationDeployment, deploy_stdout]}
+
diff --git a/extraconfig/post_deploy/rhel-registration/scripts/rhel-registration b/extraconfig/pre_deploy/rhel-registration/scripts/rhel-registration
index cbbd6a1d..cbbd6a1d 100644
--- a/extraconfig/post_deploy/rhel-registration/scripts/rhel-registration
+++ b/extraconfig/pre_deploy/rhel-registration/scripts/rhel-registration
diff --git a/extraconfig/post_deploy/rhel-registration/scripts/rhel-unregistration b/extraconfig/pre_deploy/rhel-registration/scripts/rhel-unregistration
index 1e72e0a6..1e72e0a6 100644
--- a/extraconfig/post_deploy/rhel-registration/scripts/rhel-unregistration
+++ b/extraconfig/pre_deploy/rhel-registration/scripts/rhel-unregistration
diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh
index 3d4c772b..3ba13f23 100755
--- a/extraconfig/tasks/yum_update.sh
+++ b/extraconfig/tasks/yum_update.sh
@@ -8,6 +8,7 @@
 #   command_arguments - yum command arguments, defaults to ""
 
 echo "Started yum_update.sh on server $deploy_server_id at `date`"
+echo -n "false" > $heat_outputs_path.update_managed_packages
 
 if [[ -z "$update_identifier" ]]; then
     echo "Not running due to unset update_identifier"
@@ -20,6 +21,9 @@ mkdir -p $timestamp_dir
 # sanitise to remove unusual characters
 update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
 
+# seconds to wait for this node to rejoin the cluster after update
+cluster_start_timeout=360
+
 timestamp_file="$timestamp_dir/$update_identifier"
 if [[ -a "$timestamp_file" ]]; then
     echo "Not running for already-run timestamp \"$update_identifier\""
@@ -27,6 +31,136 @@ if [[ -a "$timestamp_file" ]]; then
 fi
 touch "$timestamp_file"
 
+command_arguments=${command_arguments:-}
+
+list_updates=$(yum list updates)
+
+if [[ "$list_updates" == "" ]]; then
+    echo "No packages require updating"
+    exit 0
+fi
+
+pacemaker_status=$(systemctl is-active pacemaker)
+
+if [[ "$pacemaker_status" == "active" ]] ; then
+    echo "Checking for and adding missing constraints"
+
+    if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
+        pcs constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
+    fi
+
+    if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
+        pcs constraint order start rabbitmq-clone then openstack-keystone-clone
+    fi
+
+    if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
+        pcs constraint order promote galera-master then openstack-keystone-clone
+    fi
+
+    if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
+        pcs constraint order start haproxy-clone then openstack-keystone-clone
+    fi
+
+    if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
+        pcs constraint order start memcached-clone then openstack-keystone-clone
+    fi
+
+    if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
+        pcs constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
+    fi
+
+    if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
+        pcs resource defaults resource-stickiness=INFINITY
+    fi
+
+    echo "Setting resource start/stop timeouts"
+
+    # timeouts for non-openstack services and special cases
+    pcs resource update haproxy op start timeout=100s
+    pcs resource update haproxy op stop timeout=100s
+    # mongod start timeout is also higher, setting only stop timeout
+    pcs resource update mongod op stop timeout=100s
+    # rabbit start timeout is already 100s
+    pcs resource update rabbitmq op stop timeout=100s
+    pcs resource update memcached op start timeout=100s
+    pcs resource update memcached op stop timeout=100s
+    pcs resource update httpd op start timeout=100s
+    pcs resource update httpd op stop timeout=100s
+    # neutron-netns-cleanup stop timeout is 300s, setting only start timeout
+    pcs resource update neutron-netns-cleanup op start timeout=100s
+    # neutron-ovs-cleanup stop timeout is 300s, setting only start timeout
+    pcs resource update neutron-ovs-cleanup op start timeout=100s
+
+    # timeouts for openstack services
+    pcs resource update neutron-dhcp-agent op start timeout=100s
+    pcs resource update neutron-dhcp-agent op stop timeout=100s
+    pcs resource update neutron-l3-agent op start timeout=100s
+    pcs resource update neutron-l3-agent op stop timeout=100s
+    pcs resource update neutron-metadata-agent op start timeout=100s
+    pcs resource update neutron-metadata-agent op stop timeout=100s
+    pcs resource update neutron-openvswitch-agent op start timeout=100s
+    pcs resource update neutron-openvswitch-agent op stop timeout=100s
+    pcs resource update neutron-server op start timeout=100s
+    pcs resource update neutron-server op stop timeout=100s
+    pcs resource update openstack-ceilometer-alarm-evaluator op start timeout=100s
+    pcs resource update openstack-ceilometer-alarm-evaluator op stop timeout=100s
+    pcs resource update openstack-ceilometer-alarm-notifier op start timeout=100s
+    pcs resource update openstack-ceilometer-alarm-notifier op stop timeout=100s
+    pcs resource update openstack-ceilometer-api op start timeout=100s
+    pcs resource update openstack-ceilometer-api op stop timeout=100s
+    pcs resource update openstack-ceilometer-central op start timeout=100s
+    pcs resource update openstack-ceilometer-central op stop timeout=100s
+    pcs resource update openstack-ceilometer-collector op start timeout=100s
+    pcs resource update openstack-ceilometer-collector op stop timeout=100s
+    pcs resource update openstack-ceilometer-notification op start timeout=100s
+    pcs resource update openstack-ceilometer-notification op stop timeout=100s
+    pcs resource update openstack-cinder-api op start timeout=100s
+    pcs resource update openstack-cinder-api op stop timeout=100s
+    pcs resource update openstack-cinder-scheduler op start timeout=100s
+    pcs resource update openstack-cinder-scheduler op stop timeout=100s
+    pcs resource update openstack-cinder-volume op start timeout=100s
+    pcs resource update openstack-cinder-volume op stop timeout=100s
+    pcs resource update openstack-glance-api op start timeout=100s
+    pcs resource update openstack-glance-api op stop timeout=100s
+    pcs resource update openstack-glance-registry op start timeout=100s
+    pcs resource update openstack-glance-registry op stop timeout=100s
+    pcs resource update openstack-heat-api op start timeout=100s
+    pcs resource update openstack-heat-api op stop timeout=100s
+    pcs resource update openstack-heat-api-cfn op start timeout=100s
+    pcs resource update openstack-heat-api-cfn op stop timeout=100s
+    pcs resource update openstack-heat-api-cloudwatch op start timeout=100s
+    pcs resource update openstack-heat-api-cloudwatch op stop timeout=100s
+    pcs resource update openstack-heat-engine op start timeout=100s
+    pcs resource update openstack-heat-engine op stop timeout=100s
+    pcs resource update openstack-keystone op start timeout=100s
+    pcs resource update openstack-keystone op stop timeout=100s
+    pcs resource update openstack-nova-api op start timeout=100s
+    pcs resource update openstack-nova-api op stop timeout=100s
+    pcs resource update openstack-nova-conductor op start timeout=100s
+    pcs resource update openstack-nova-conductor op stop timeout=100s
+    pcs resource update openstack-nova-consoleauth op start timeout=100s
+    pcs resource update openstack-nova-consoleauth op stop timeout=100s
+    pcs resource update openstack-nova-novncproxy op start timeout=100s
+    pcs resource update openstack-nova-novncproxy op stop timeout=100s
+    pcs resource update openstack-nova-scheduler op start timeout=100s
+    pcs resource update openstack-nova-scheduler op stop timeout=100s
+
+    echo "Pacemaker running, stopping cluster node and doing full package update"
+    node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
+    if [[ "$node_count" == "1" ]] ; then
+        echo "Active node count is 1, stopping node with --force"
+        pcs cluster stop --force
+    else
+        pcs cluster stop
+    fi
+else
+    echo "Excluding upgrading packages that are handled by config management tooling"
+    command_arguments="$command_arguments --skip-broken"
+    for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do
+        command_arguments="$command_arguments --exclude $exclude"
+    done
+fi
+
 command=${command:-update}
 full_command="yum -y $command $command_arguments"
 echo "Running: $full_command"
@@ -36,6 +170,27 @@ return_code=$?
 echo "$result"
 echo "yum return code: $return_code"
 
+if [[ "$pacemaker_status" == "active" ]] ; then
+    echo "Starting cluster node"
+    pcs cluster start
+
+    hostname=$(hostname -s)
+    tstart=$(date +%s)
+    while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do
+        sleep 5
+        tnow=$(date +%s)
+        if (( tnow-tstart > cluster_start_timeout )) ; then
+            echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds"
+            pcs status
+            exit 1
+        fi
+    done
+    pcs status
+
+else
+    echo -n "true" > $heat_outputs_path.update_managed_packages
+fi
+
 echo "Finished yum_update.sh on server $deploy_server_id at `date`"
 
 exit $return_code
diff --git a/extraconfig/tasks/yum_update.yaml b/extraconfig/tasks/yum_update.yaml
index e918149e..d313ca9f 100644
--- a/extraconfig/tasks/yum_update.yaml
+++ b/extraconfig/tasks/yum_update.yaml
@@ -20,7 +20,10 @@ resources:
       - name: command_arguments
         description: yum command arguments, defaults to ""
         default: ''
+      outputs:
+      - name: update_managed_packages
+        description: boolean value indicating whether to upgrade managed packages
 
 outputs:
   OS::stack_id:
-    value: {get_resource: config}
-\ No newline at end of file
+    value: {get_resource: config}