diff options
author | Oliver Walsh <owalsh@redhat.com> | 2017-10-20 23:27:15 +0100 |
---|---|---|
committer | Oliver Walsh <owalsh@redhat.com> | 2017-11-10 09:25:25 +0000 |
commit | 4217767d8782abae02b11f553fc14daf7cd72916 (patch) | |
tree | 40a20fb6bd90c57faed62de6192d3b208a6a0194 | |
parent | 690b938c58af1446bd27c34b17fa45e865241e99 (diff) |
Refactor cellv2 host discovery logic to avoid races
The compute service list is polled until all expected hosts are reported or a
timeout occurs (600s).
Adds a cellv2_discovery flag to puppet services. Used to generate a list of
hosts that should have cellv2 host mappings.
Adds a canonical fqdn and that should match the fqdn reported by a host.
Adds the ability to upload a config script for docker config instead of using
complex bash on-liners.
Closes-bug: 1720821
Change-Id: I33e2f296526c957cb5f96dff19682a4e60c6a0f0
(cherry picked from commit 61fcfca045aeb5be1ee280d8dd9c260fb39b9084)
-rw-r--r-- | common/deploy-steps.j2 | 6 | ||||
-rw-r--r-- | common/services.yaml | 27 | ||||
-rw-r--r-- | docker/services/nova-api.yaml | 63 | ||||
-rw-r--r-- | docker/services/nova-compute.yaml | 2 | ||||
-rw-r--r-- | overcloud.j2.yaml | 15 | ||||
-rw-r--r-- | puppet/all-nodes-config.j2.yaml | 6 | ||||
-rw-r--r-- | puppet/role.role.j2.yaml | 10 | ||||
-rwxr-xr-x | tools/yaml-validate.py | 5 |
8 files changed, 129 insertions, 5 deletions
diff --git a/common/deploy-steps.j2 b/common/deploy-steps.j2 index 5c923a99..542bf720 100644 --- a/common/deploy-steps.j2 +++ b/common/deploy-steps.j2 @@ -194,6 +194,7 @@ resources: kolla_config: {get_param: [role_data, {{role.name}}, kolla_config]} bootstrap_server_id: {get_param: [servers, {{primary_role_name}}, '0']} puppet_step_config: {get_param: [role_data, {{role.name}}, step_config]} + docker_config_scripts: {get_param: [role_data, {{role.name}}, docker_config_scripts]} tasks: # Join host_prep_tasks with the other per-host configuration list_concat: @@ -217,6 +218,11 @@ resources: # FIXME: can we move docker-puppet somewhere so it's installed via a package? - name: Write docker-puppet.py copy: content="{{docker_puppet_script}}" dest=/var/lib/docker-puppet/docker-puppet.py force=yes mode=0600 + - name: Create /var/lib/docker-config-scripts + file: path=/var/lib/docker-config-scripts state=directory + - name: Write docker config scripts + copy: content="{{item.value.content}}" dest="/var/lib/docker-config-scripts/{{item.key}}" force=yes mode="{{item.value.mode|default('0600', true)}}" + with_dict: "{{docker_config_scripts}}" # Here we are dumping all the docker container startup configuration data # so that we can have access to how they are started outside of heat # and docker-cmd. This lets us create command line tools to test containers. diff --git a/common/services.yaml b/common/services.yaml index a0015c7e..f7062066 100644 --- a/common/services.yaml +++ b/common/services.yaml @@ -89,6 +89,31 @@ resources: service_names: {get_attr: [ServiceChain, role_data, service_names]} docker_config: {get_attr: [ServiceChain, role_data, docker_config]} + DockerConfigScripts: + type: OS::Heat::Value + properties: + type: json + value: + yaql: + expression: + # select 'docker_config_scripts' only from services that have it + coalesce($.data.service_names, []).zip(coalesce($.data.docker_config_scripts, [])).where($[1] != null).select($[1]).reduce($1.mergeWith($2), {}) + data: + service_names: {get_attr: [ServiceChain, role_data, service_names]} + docker_config_scripts: {get_attr: [ServiceChain, role_data, docker_config_scripts]} + + CellV2Discovery: + type: OS::Heat::Value + properties: + type: boolean + value: + yaql: + expression: + # If any service in this role requires cellv2_discovery then this value is true + coalesce($.data.cellv2_discovery, []).contains(true) + data: + cellv2_discovery: {get_attr: [ServiceChain, role_data, cellv2_discovery]} + LoggingSourcesConfig: type: OS::Heat::Value properties: @@ -282,5 +307,7 @@ outputs: puppet_config: {get_attr: [PuppetConfig, value]} kolla_config: {get_attr: [KollaConfig, value]} docker_config: {get_attr: [DockerConfig, value]} + docker_config_scripts: {get_attr: [DockerConfigScripts, value]} docker_puppet_tasks: {get_attr: [DockerPuppetTasks, value]} host_prep_tasks: {get_attr: [HostPrepTasks, value]} + cellv2_discovery: {get_attr: [CellV2Discovery, value]} diff --git a/docker/services/nova-api.yaml b/docker/services/nova-api.yaml index 7f1b7a54..ee73f704 100644 --- a/docker/services/nova-api.yaml +++ b/docker/services/nova-api.yaml @@ -113,6 +113,58 @@ outputs: - path: /var/log/nova owner: nova:nova recurse: true + docker_config_scripts: + nova_api_discover_hosts.sh: + mode: "0700" + content: | + #!/bin/bash + export OS_PROJECT_DOMAIN_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken project_domain_name) + export OS_USER_DOMAIN_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken user_domain_name) + export OS_PROJECT_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken project_name) + export OS_USERNAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken username) + export OS_PASSWORD=$(crudini --get /etc/nova/nova.conf keystone_authtoken password) + export OS_AUTH_URL=$(crudini --get /etc/nova/nova.conf keystone_authtoken auth_url) + export OS_AUTH_TYPE=password + export OS_IDENTITY_API_VERSION=3 + + echo "(cellv2) Running cell_v2 host discovery" + timeout=600 + loop_wait=30 + declare -A discoverable_hosts + for host in $(hiera -c /etc/puppet/hiera.yaml cellv2_discovery_hosts | sed -e '/^nil$/d' | tr "," " "); do discoverable_hosts[$host]=1; done + timeout_at=$(( $(date +"%s") + ${timeout} )) + echo "(cellv2) Waiting ${timeout} seconds for hosts to register" + finished=0 + while : ; do + for host in $(openstack -q compute service list -c 'Host' -c 'Zone' -f value | awk '$2 != "internal" { print $1 }'); do + if (( discoverable_hosts[$host] == 1 )); then + echo "(cellv2) compute node $host has registered" + unset discoverable_hosts[$host] + fi + done + finished=1 + for host in "${!discoverable_hosts[@]}"; do + if (( ${discoverable_hosts[$host]} == 1 )); then + echo "(cellv2) compute node $host has not registered" + finished=0 + fi + done + remaining=$(( $timeout_at - $(date +"%s") )) + if (( $finished == 1 )); then + echo "(cellv2) All nodes registered" + break + elif (( $remaining <= 0 )); then + echo "(cellv2) WARNING: timeout waiting for nodes to register, running host discovery regardless" + echo "(cellv2) Expected host list:" $(hiera -c /etc/puppet/hiera.yaml cellv2_discovery_hosts | sed -e '/^nil$/d' | sort -u | tr ',' ' ') + echo "(cellv2) Detected host list:" $(openstack -q compute service list -c 'Host' -c 'Zone' -f value | awk '$2 != "internal" { print $1 }' | sort -u | tr '\n', ' ') + break + else + echo "(cellv2) Waiting ${remaining} seconds for hosts to register" + sleep $loop_wait + fi + done + echo "(cellv2) Running host discovery..." + su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 discover_hosts --verbose" docker_config: # db sync runs before permissions set by kolla_config step_2: @@ -223,9 +275,16 @@ outputs: image: *nova_api_image net: host detach: false - volumes: *nova_api_bootstrap_volumes + volumes: + list_concat: + - *nova_api_bootstrap_volumes + - + - /var/lib/config-data/nova/etc/my.cnf.d/tripleo.cnf:/etc/my.cnf.d/tripleo.cnf:ro + - /var/lib/config-data/nova/etc/nova/:/etc/nova/:ro + - /var/log/containers/nova:/var/log/nova + - /var/lib/docker-config-scripts/nova_api_discover_hosts.sh:/nova_api_discover_hosts.sh:ro user: root - command: "/usr/bin/bootstrap_host_exec nova_api su nova -s /bin/bash -c '/usr/bin/nova-manage cell_v2 discover_hosts --verbose'" + command: "/usr/bin/bootstrap_host_exec nova_api /nova_api_discover_hosts.sh" metadata_settings: get_attr: [NovaApiBase, role_data, metadata_settings] host_prep_tasks: diff --git a/docker/services/nova-compute.yaml b/docker/services/nova-compute.yaml index b43193ec..6db9e580 100644 --- a/docker/services/nova-compute.yaml +++ b/docker/services/nova-compute.yaml @@ -73,6 +73,7 @@ outputs: description: Role data for the Nova Compute service. value: service_name: {get_attr: [NovaComputeBase, role_data, service_name]} + cellv2_discovery: true config_settings: get_attr: [NovaComputeBase, role_data, config_settings] logging_source: {get_attr: [NovaComputeBase, role_data, logging_source]} @@ -111,7 +112,6 @@ outputs: owner: nova:nova recurse: true docker_config: - # FIXME: run discover hosts here step_4: nova_compute: image: &nova_compute_image {get_param: DockerNovaComputeImage} diff --git a/overcloud.j2.yaml b/overcloud.j2.yaml index 3506fe8e..9ea195da 100644 --- a/overcloud.j2.yaml +++ b/overcloud.j2.yaml @@ -642,6 +642,21 @@ resources: {% for role in roles %} - {get_attr: [{{role.name}}ServiceNames, value]} {% endfor %} + cellv2_discovery_hosts: + # Collects compute hostnames for all roles with a service that requires cellv2 host discovery + list_join: + - ',' + - yaql: + expression: coalesce($.data.e.zip($.data.l).where($[0]).select($[1]).flatten(), []) + data: + e: # list of true/fails for whether cellsv2 host discovery is required for the roles +{%- for role in roles %} + - {get_attr: [{{role.name}}ServiceChainRoleData, value, cellv2_discovery]} +{%- endfor %} + l: # list of list of compute hostnames for the roles +{%- for role in roles %} + - {get_attr: [{{role.name}}, hostname_map, canonical]} +{%- endfor %} controller_ips: {get_attr: [{{primary_role_name}}, ip_address]} controller_names: {get_attr: [{{primary_role_name}}, hostname]} service_ips: diff --git a/puppet/all-nodes-config.j2.yaml b/puppet/all-nodes-config.j2.yaml index bdd2bcf3..65949625 100644 --- a/puppet/all-nodes-config.j2.yaml +++ b/puppet/all-nodes-config.j2.yaml @@ -22,6 +22,8 @@ parameters: type: json controller_names: type: comma_delimited_list + cellv2_discovery_hosts: + type: comma_delimited_list NetVipMap: type: json RedisVirtualIP: @@ -141,6 +143,10 @@ resources: list_join: - ',' - {get_param: controller_names} + - cellv2_discovery_hosts: + list_join: + - ',' + - {get_param: cellv2_discovery_hosts} deploy_identifier: {get_param: DeployIdentifier} update_identifier: {get_param: UpdateIdentifier} stack_action: {get_param: StackAction} diff --git a/puppet/role.role.j2.yaml b/puppet/role.role.j2.yaml index d53afd04..a3cbe851 100644 --- a/puppet/role.role.j2.yaml +++ b/puppet/role.role.j2.yaml @@ -477,6 +477,14 @@ resources: - '.' - - {get_attr: [{{server_resource_name}}, name]} - ctlplane + canonical: + fqdn: + list_join: + - '.' + - - {get_attr: [{{server_resource_name}}, name]} + - {get_param: CloudDomain} + short: + - {get_attr: [{{server_resource_name}}, name]} PreNetworkConfig: type: OS::TripleO::{{role.name}}::PreNetworkConfig @@ -602,6 +610,7 @@ resources: fqdn_management: {get_attr: [NetHostMap, value, management, fqdn]} fqdn_ctlplane: {get_attr: [NetHostMap, value, ctlplane, fqdn]} fqdn_external: {get_attr: [NetHostMap, value, external, fqdn]} + fqdn_canonical: {get_attr: [NetHostMap, value, canonical, fqdn]} # Resource for site-specific injection of root certificate NodeTLSCAData: @@ -696,6 +705,7 @@ outputs: {{network.name_lower|default(network.name.lower())}}: {get_attr: [NetHostMap, value, {{network.name_lower|default(network.name.lower()) }}, fqdn]} {%- endfor %} ctlplane: {get_attr: [NetHostMap, value, ctlplane, fqdn]} + canonical: {get_attr: [NetHostMap, value, canonical, fqdn]} hosts_entry: value: str_replace: diff --git a/tools/yaml-validate.py b/tools/yaml-validate.py index 76f856db..9279f1db 100755 --- a/tools/yaml-validate.py +++ b/tools/yaml-validate.py @@ -31,14 +31,15 @@ envs_containing_endpoint_map = ['tls-endpoints-public-dns.yaml', 'tls-endpoints-public-ip.yaml', 'tls-everywhere-endpoints-dns.yaml'] ENDPOINT_MAP_FILE = 'endpoint_map.yaml' -OPTIONAL_SECTIONS = ['workflow_tasks'] +OPTIONAL_SECTIONS = ['workflow_tasks', 'cellv2_discovery'] REQUIRED_DOCKER_SECTIONS = ['service_name', 'docker_config', 'puppet_config', 'config_settings', 'step_config'] OPTIONAL_DOCKER_SECTIONS = ['docker_puppet_tasks', 'upgrade_tasks', 'post_upgrade_tasks', 'update_tasks', 'service_config_settings', 'host_prep_tasks', 'metadata_settings', - 'kolla_config', 'logging_source', 'logging_groups'] + 'kolla_config', 'logging_source', + 'logging_groups', 'docker_config_scripts'] REQUIRED_DOCKER_PUPPET_CONFIG_SECTIONS = ['config_volume', 'step_config', 'config_image'] OPTIONAL_DOCKER_PUPPET_CONFIG_SECTIONS = [ 'puppet_tags', 'volumes' ] |