summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ci/environments/multinode_major_upgrade.yaml46
-rw-r--r--puppet/major_upgrade_steps.j2.yaml80
-rw-r--r--puppet/services/README.rst22
-rw-r--r--puppet/services/barbican-api.yaml4
-rw-r--r--puppet/services/ceph-mon.yaml21
-rw-r--r--puppet/services/ceph-osd.yaml41
-rw-r--r--puppet/services/gnocchi-metricd.yaml2
-rw-r--r--puppet/services/ironic-api.yaml4
-rw-r--r--puppet/services/ironic-conductor.yaml7
-rw-r--r--puppet/services/neutron-dhcp.yaml5
-rw-r--r--puppet/services/services.yaml5
-rw-r--r--roles_data.yaml5
12 files changed, 234 insertions, 8 deletions
diff --git a/ci/environments/multinode_major_upgrade.yaml b/ci/environments/multinode_major_upgrade.yaml
new file mode 100644
index 00000000..7de1594f
--- /dev/null
+++ b/ci/environments/multinode_major_upgrade.yaml
@@ -0,0 +1,46 @@
+resource_registry:
+ OS::TripleO::Controller::Net::SoftwareConfig: ../heat-templates/net-config-multinode.yaml
+ OS::TripleO::Compute::Net::SoftwareConfig: ../heat-templates/net-config-multinode.yaml
+ OS::TripleO::Services::Core: multinode-core.yaml
+
+parameter_defaults:
+ ControllerServices:
+ - OS::TripleO::Services::CACerts
+ - OS::TripleO::Services::Core
+ - OS::TripleO::Services::Kernel
+ - OS::TripleO::Services::Keystone
+ - OS::TripleO::Services::GlanceApi
+ - OS::TripleO::Services::GlanceRegistry
+ - OS::TripleO::Services::NeutronDhcpAgent
+ - OS::TripleO::Services::NeutronL3Agent
+ - OS::TripleO::Services::NeutronMetadataAgent
+ - OS::TripleO::Services::NeutronServer
+ - OS::TripleO::Services::NeutronCorePlugin
+ - OS::TripleO::Services::NeutronOvsAgent
+ - OS::TripleO::Services::CinderApi
+ - OS::TripleO::Services::CinderScheduler
+ - OS::TripleO::Services::CinderVolume
+ - OS::TripleO::Services::HeatApi
+ - OS::TripleO::Services::HeatApiCfn
+ - OS::TripleO::Services::HeatApiCloudwatch
+ - OS::TripleO::Services::HeatEngine
+ - OS::TripleO::Services::SwiftProxy
+ - OS::TripleO::Services::SwiftStorage
+ - OS::TripleO::Services::SwiftRingBuilder
+ - OS::TripleO::Services::SaharaApi
+ - OS::TripleO::Services::SaharaEngine
+ - OS::TripleO::Services::MySQL
+ - OS::TripleO::Services::RabbitMQ
+ - OS::TripleO::Services::HAproxy
+ - OS::TripleO::Services::Keepalived
+ - OS::TripleO::Services::Memcached
+ - OS::TripleO::Services::Ntp
+ - OS::TripleO::Services::Timezone
+ - OS::TripleO::Services::TripleoPackages
+ - OS::TripleO::Services::TripleoFirewall
+ ControllerExtraConfig:
+ nova::compute::libvirt::services::libvirt_virt_type: qemu
+ nova::compute::libvirt::libvirt_virt_type: qemu
+ # Required for Centos 7.3 and Qemu 2.6.0
+ nova::compute::libvirt::libvirt_cpu_mode: 'none'
+ SwiftCeilometerPipelineEnabled: False
diff --git a/puppet/major_upgrade_steps.j2.yaml b/puppet/major_upgrade_steps.j2.yaml
index 433b03a0..eae85991 100644
--- a/puppet/major_upgrade_steps.j2.yaml
+++ b/puppet/major_upgrade_steps.j2.yaml
@@ -1,3 +1,4 @@
+{% set upgrade_steps_max = 8 -%}
heat_template_version: ocata
description: 'Upgrade steps for all roles'
@@ -15,26 +16,94 @@ parameters:
Setting to a previously unused value during stack-update will trigger
the Upgrade resources to re-run on all roles.
+conditions:
+ # Conditions to disable any steps where the task list is empty
+{% for step in range(0, upgrade_steps_max) %}
+ {% for role in roles %}
+ UpgradeBatchConfig_Step{{step}}Enabled:
+ not:
+ equals:
+ - {get_param: [role_data, {{role.name}}, upgrade_batch_tasks]}
+ - []
+ UpgradeConfig_Step{{step}}Enabled:
+ not:
+ equals:
+ - {get_param: [role_data, {{role.name}}, upgrade_tasks]}
+ - []
+ {% endfor %}
+{% endfor %}
+
resources:
+# Upgrade Steps for all roles, batched updates
+# FIXME(shardy): would be nice to make the number of steps configurable
+{% for step in range(0, upgrade_steps_max) %}
+ {% for role in roles %}
+ # Step {{step}} resources
+ {{role.name}}UpgradeBatchConfig_Step{{step}}:
+ type: OS::TripleO::UpgradeConfig
+ condition: UpgradeBatchConfig_Step{{step}}Enabled
+ # The UpgradeConfig resources could actually be created without
+ # serialization, but the event output is easier to follow if we
+ # do, and there should be minimal performance hit (creating the
+ # config is cheap compared to the time to apply the deployment).
+ {% if step > 0 %}
+ depends_on:
+ {% for dep in roles %}
+ - {{dep.name}}UpgradeBatch_Step{{step -1}}
+ {% endfor %}
+ {% endif %}
+ properties:
+ UpgradeStepConfig: {get_param: [role_data, {{role.name}}, upgrade_batch_tasks]}
+ step: {{step}}
+
+ {{role.name}}UpgradeBatch_Step{{step}}:
+ type: OS::Heat::StructuredDeploymentGroup
+ condition: UpgradeBatchConfig_Step{{step}}Enabled
+ {% if step > 0 %}
+ depends_on:
+ {% for dep in roles %}
+ - {{dep.name}}UpgradeBatch_Step{{step -1}}
+ {% endfor %}
+ {% endif %}
+ update_policy:
+ batch_create:
+ max_batch_size: {{role.upgrade_batch_size|default(1)}}
+ rolling_update:
+ max_batch_size: {{role.upgrade_batch_size|default(1)}}
+ properties:
+ name: {{role.name}}UpgradeBatch_Step{{step}}
+ servers: {get_param: [servers, {{role.name}}]}
+ config: {get_resource: {{role.name}}UpgradeBatchConfig_Step{{step}}}
+ input_values:
+ role: {{role.name}}
+ update_identifier: {get_param: UpdateIdentifier}
+ {% endfor %}
+{% endfor %}
+
# Upgrade Steps for all roles
# FIXME(shardy): would be nice to make the number of steps configurable
-{% for step in range(0, 8) %}
+{% for step in range(0, upgrade_steps_max) %}
{% for role in roles %}
# Step {{step}} resources
{{role.name}}UpgradeConfig_Step{{step}}:
type: OS::TripleO::UpgradeConfig
+ condition: UpgradeConfig_Step{{step}}Enabled
# The UpgradeConfig resources could actually be created without
# serialization, but the event output is easier to follow if we
# do, and there should be minimal performance hit (creating the
# config is cheap compared to the time to apply the deployment).
- {% if step > 0 %}
depends_on:
+ {% if step > 0 %}
{% for dep in roles %}
{% if not dep.disable_upgrade_deployment|default(false) %}
- {{dep.name}}Upgrade_Step{{step -1}}
{% endif %}
{% endfor %}
+ {% else %}
+ {% for dep in roles %}
+ - {{dep.name}}UpgradeBatch_Step{{upgrade_steps_max -1}}
+ {% endfor %}
{% endif %}
properties:
UpgradeStepConfig: {get_param: [role_data, {{role.name}}, upgrade_tasks]}
@@ -42,13 +111,18 @@ resources:
{% if not role.disable_upgrade_deployment|default(false) %}
{{role.name}}Upgrade_Step{{step}}:
type: OS::Heat::StructuredDeploymentGroup
- {% if step > 0 %}
+ condition: UpgradeConfig_Step{{step}}Enabled
depends_on:
+ {% if step > 0 %}
{% for dep in roles %}
{% if not dep.disable_upgrade_deployment|default(false) %}
- {{dep.name}}Upgrade_Step{{step -1}}
{% endif %}
{% endfor %}
+ {% else %}
+ {% for dep in roles %}
+ - {{dep.name}}UpgradeBatch_Step{{upgrade_steps_max -1}}
+ {% endfor %}
{% endif %}
properties:
name: {{role.name}}Upgrade_Step{{step}}
diff --git a/puppet/services/README.rst b/puppet/services/README.rst
index 6e4e9c1d..34cb350b 100644
--- a/puppet/services/README.rst
+++ b/puppet/services/README.rst
@@ -49,6 +49,28 @@ are re-asserted when applying latter ones.
5) Service activation (Pacemaker)
+Batch Upgrade Steps
+-------------------
+
+Each service template may optionally define a `upgrade_batch_tasks` key, which
+is a list of ansible tasks to be performed during the upgrade process.
+
+Similar to the step_config, we allow a series of steps for the per-service
+upgrade sequence, defined as ansible tasks with a tag e.g "step1" for the first
+step, "step2" for the second, etc. Note that each step is performed in batches,
+then we move on to the next step which is also performed in batches (we don't
+perform all steps on one node, then move on to the next one which means you
+can sequence rolling upgrades of dependent services via the step value).
+
+The tasks performed at each step is service specific, but note that all batch
+upgrade steps are performed before the `upgrade_tasks` described below. This
+means that all services that support rolling upgrades can be upgraded without
+downtime during `upgrade_batch_tasks`, then any remaining services are stopped
+and upgraded during `upgrade_tasks`
+
+The default batch size is 1, but this can be overridden for each role via the
+`upgrade_batch_size` option in roles_data.yaml
+
Upgrade Steps
-------------
diff --git a/puppet/services/barbican-api.yaml b/puppet/services/barbican-api.yaml
index 1f220e6b..4e420750 100644
--- a/puppet/services/barbican-api.yaml
+++ b/puppet/services/barbican-api.yaml
@@ -136,13 +136,13 @@ outputs:
nova::compute::barbican_endpoint:
get_param: [EndpointMap, BarbicanInternal, uri]
nova::compute::barbican_auth_endpoint:
- get_param: [EndpointMap, KeystoneV3Internal, uri]
+ get_param: [EndpointMap, KeystoneV3Internal, uri_no_suffix]
cinder_api:
cinder::api::keymgr_api_class: >
castellan.key_manager.barbican_key_manager.BarbicanKeyManager
cinder::api::keymgr_encryption_api_url:
get_param: [EndpointMap, BarbicanInternal, uri]
cinder::api::keymgr_encryption_auth_url:
- get_param: [EndpointMap, KeystoneV3Internal, uri]
+ get_param: [EndpointMap, KeystoneV3Internal, uri_no_suffix]
metadata_settings:
get_attr: [ApacheServiceBase, role_data, metadata_settings]
diff --git a/puppet/services/ceph-mon.yaml b/puppet/services/ceph-mon.yaml
index 68ad69b7..0c61305d 100644
--- a/puppet/services/ceph-mon.yaml
+++ b/puppet/services/ceph-mon.yaml
@@ -113,3 +113,24 @@ outputs:
get_attr: [CephBase, role_data, service_config_settings]
step_config: |
include ::tripleo::profile::base::ceph::mon
+ upgrade_batch_tasks:
+ # Note we perform these tasks in list order, but they are all step0 so
+ # we can perform a rolling upgrade of all mon nodes in step0, then a
+ # rolling upgrade of all osd nodes in step1
+ # FIXME(shardy) I suspect we can use heat or ansible facts here instead?
+ - name: Get hostname
+ tags: step0
+ shell: hostname -s
+ register: mon_id
+ - name: Stop Ceph Mon
+ tags: step0
+ service: name=ceph-mon@{{mon_id.stdout}} pattern=ceph-mon state=stopped
+ - name: Update ceph packages
+ tags: step0
+ yum: name=ceph-mon,ceph state=latest
+ - name: Start ceph-mon service
+ tags: step0
+ service: name=ceph-mon@{{mon_id.stdout}} state=started
+ - name: ceph osd crush tunables default
+ tags: step0
+ shell: ceph osd crush tunables default
diff --git a/puppet/services/ceph-osd.yaml b/puppet/services/ceph-osd.yaml
index df0ee6c3..e9ed6c29 100644
--- a/puppet/services/ceph-osd.yaml
+++ b/puppet/services/ceph-osd.yaml
@@ -45,3 +45,44 @@ outputs:
- '6800-7300'
step_config: |
include ::tripleo::profile::base::ceph::osd
+ upgrade_batch_tasks:
+ - name: Get OSD IDs
+ tags: step1
+ shell: ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }'
+ register: osd_ids
+ # "so that mirrors aren't rebalanced as if the OSD died" - gfidente / leseb
+ - name: ceph osd set noout
+ tags: step1
+ command: ceph osd set noout
+ - name: ceph osd set norebalance
+ tags: step1
+ command: ceph osd set norebalance
+ - name: ceph osd set nodeep-scrub
+ tags: step1
+ command: ceph osd set nodeep-scrub
+ - name: ceph osd set noscrub
+ tags: step1
+ command: ceph osd set noscrub
+ - name: Stop Ceph OSD
+ tags: step1
+ service: name=ceph-osd@$item state=stopped
+ with_items: "{{osd_ids.stdout.strip().split()}}"
+ - name: Update ceph OSD packages
+ tags: step1
+ yum: name=ceph-osd state=latest
+ - name: Start ceph-osd service
+ tags: step1
+ service: name=ceph-osd@$item state=started
+ with_items: "{{osd_ids.stdout.strip().split()}}"
+ - name: ceph osd unset noout
+ tags: step1
+ command: ceph osd unset noout
+ - name: ceph osd unset norebalance
+ tags: step1
+ command: ceph osd unset norebalance
+ - name: ceph osd unset nodeep-scrub
+ tags: step1
+ command: ceph osd unset nodeep-scrub
+ - name: ceph osd unset noscrub
+ tags: step1
+ command: ceph osd unset noscrub
diff --git a/puppet/services/gnocchi-metricd.yaml b/puppet/services/gnocchi-metricd.yaml
index e5f9a8e7..27700606 100644
--- a/puppet/services/gnocchi-metricd.yaml
+++ b/puppet/services/gnocchi-metricd.yaml
@@ -22,7 +22,7 @@ parameters:
default: 'overcloud-gnocchi-metricd'
type: string
GnocchiMetricdWorkers:
- default: ''
+ default: '%{::os_workers}'
description: Number of workers for Gnocchi MetricD
type: string
diff --git a/puppet/services/ironic-api.yaml b/puppet/services/ironic-api.yaml
index aebb37b2..ff91eb63 100644
--- a/puppet/services/ironic-api.yaml
+++ b/puppet/services/ironic-api.yaml
@@ -81,3 +81,7 @@ outputs:
ironic::db::mysql::allowed_hosts:
- '%'
- "%{hiera('mysql_bind_host')}"
+ upgrade_tasks:
+ - name: Stop ironic_api service
+ tags: step2
+ service: name=openstack-ironic-api state=stopped
diff --git a/puppet/services/ironic-conductor.yaml b/puppet/services/ironic-conductor.yaml
index 194afec7..a10c03a5 100644
--- a/puppet/services/ironic-conductor.yaml
+++ b/puppet/services/ironic-conductor.yaml
@@ -98,3 +98,10 @@ outputs:
step_config: |
include ::tripleo::profile::base::ironic::conductor
+ upgrade_tasks:
+ - name: Stop ironic_conductor service
+ tags: step2
+ service: name=openstack-ironic-conductor state=stopped
+ - name: Sync ironic_conductor DB
+ tags: step5
+ command: ironic-dbsync
diff --git a/puppet/services/neutron-dhcp.yaml b/puppet/services/neutron-dhcp.yaml
index 5e7de18e..bb4742c9 100644
--- a/puppet/services/neutron-dhcp.yaml
+++ b/puppet/services/neutron-dhcp.yaml
@@ -39,6 +39,10 @@ parameters:
default:
tag: openstack.neutron.agent.dhcp
path: /var/log/neutron/dhcp-agent.log
+ NeutronDhcpAgentDnsmasqDnsServers:
+ default: []
+ description: List of servers to use as dnsmasq forwarders
+ type: comma_delimited_list
resources:
@@ -64,6 +68,7 @@ outputs:
- neutron::agents::dhcp::enable_isolated_metadata: {get_param: NeutronEnableIsolatedMetadata}
neutron::agents::dhcp::enable_force_metadata: {get_param: NeutronEnableForceMetadata}
neutron::agents::dhcp::enable_metadata_network: {get_param: NeutronEnableMetadataNetwork}
+ neutron::agents::dhcp::dnsmasq_dns_servers: {get_param: NeutronDhcpAgentDnsmasqDnsServers}
tripleo.neutron_dhcp.firewall_rules:
'115 neutron dhcp input':
proto: 'udp'
diff --git a/puppet/services/services.yaml b/puppet/services/services.yaml
index 90268c78..80da5352 100644
--- a/puppet/services/services.yaml
+++ b/puppet/services/services.yaml
@@ -118,4 +118,9 @@ outputs:
# Note we use distinct() here to filter any identical tasks, e.g yum update for all services
expression: $.data.where($ != null).select($.get('upgrade_tasks')).where($ != null).flatten().distinct()
data: {get_attr: [ServiceChain, role_data]}
+ upgrade_batch_tasks:
+ yaql:
+ # Note we use distinct() here to filter any identical tasks, e.g yum update for all services
+ expression: $.data.where($ != null).select($.get('upgrade_batch_tasks')).where($ != null).flatten().distinct()
+ data: {get_attr: [ServiceChain, role_data]}
service_metadata_settings: {get_attr: [ServiceServerMetadataHook, metadata]}
diff --git a/roles_data.yaml b/roles_data.yaml
index 05b8b23b..a21ef961 100644
--- a/roles_data.yaml
+++ b/roles_data.yaml
@@ -17,8 +17,9 @@
# disable_constraints: (boolean) optional, whether to disable Nova and Glance
# constraints for each role specified in the templates.
#
-# disable_upgrade_deployment: (boolean) optional, whether to run the composable upgrade
-# steps for all services that are deployed on the particular role.
+# upgrade_batch_size: (number): batch size for upgrades where tasks are
+# specified by services to run in batches vs all nodes at once.
+# This defaults to 1, but larger batches may be specified here.
#
# ServicesDefault: (list) optional default list of services to be deployed
# on the role, defaults to an empty list. Sets the default for the