summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOliver Walsh <owalsh@redhat.com>2017-10-20 23:27:15 +0100
committerOliver Walsh <owalsh@redhat.com>2017-11-10 09:25:25 +0000
commit4217767d8782abae02b11f553fc14daf7cd72916 (patch)
tree40a20fb6bd90c57faed62de6192d3b208a6a0194
parent690b938c58af1446bd27c34b17fa45e865241e99 (diff)
Refactor cellv2 host discovery logic to avoid races
The compute service list is polled until all expected hosts are reported or a timeout occurs (600s). Adds a cellv2_discovery flag to puppet services. Used to generate a list of hosts that should have cellv2 host mappings. Adds a canonical fqdn and that should match the fqdn reported by a host. Adds the ability to upload a config script for docker config instead of using complex bash on-liners. Closes-bug: 1720821 Change-Id: I33e2f296526c957cb5f96dff19682a4e60c6a0f0 (cherry picked from commit 61fcfca045aeb5be1ee280d8dd9c260fb39b9084)
-rw-r--r--common/deploy-steps.j26
-rw-r--r--common/services.yaml27
-rw-r--r--docker/services/nova-api.yaml63
-rw-r--r--docker/services/nova-compute.yaml2
-rw-r--r--overcloud.j2.yaml15
-rw-r--r--puppet/all-nodes-config.j2.yaml6
-rw-r--r--puppet/role.role.j2.yaml10
-rwxr-xr-xtools/yaml-validate.py5
8 files changed, 129 insertions, 5 deletions
diff --git a/common/deploy-steps.j2 b/common/deploy-steps.j2
index 5c923a99..542bf720 100644
--- a/common/deploy-steps.j2
+++ b/common/deploy-steps.j2
@@ -194,6 +194,7 @@ resources:
kolla_config: {get_param: [role_data, {{role.name}}, kolla_config]}
bootstrap_server_id: {get_param: [servers, {{primary_role_name}}, '0']}
puppet_step_config: {get_param: [role_data, {{role.name}}, step_config]}
+ docker_config_scripts: {get_param: [role_data, {{role.name}}, docker_config_scripts]}
tasks:
# Join host_prep_tasks with the other per-host configuration
list_concat:
@@ -217,6 +218,11 @@ resources:
# FIXME: can we move docker-puppet somewhere so it's installed via a package?
- name: Write docker-puppet.py
copy: content="{{docker_puppet_script}}" dest=/var/lib/docker-puppet/docker-puppet.py force=yes mode=0600
+ - name: Create /var/lib/docker-config-scripts
+ file: path=/var/lib/docker-config-scripts state=directory
+ - name: Write docker config scripts
+ copy: content="{{item.value.content}}" dest="/var/lib/docker-config-scripts/{{item.key}}" force=yes mode="{{item.value.mode|default('0600', true)}}"
+ with_dict: "{{docker_config_scripts}}"
# Here we are dumping all the docker container startup configuration data
# so that we can have access to how they are started outside of heat
# and docker-cmd. This lets us create command line tools to test containers.
diff --git a/common/services.yaml b/common/services.yaml
index a0015c7e..f7062066 100644
--- a/common/services.yaml
+++ b/common/services.yaml
@@ -89,6 +89,31 @@ resources:
service_names: {get_attr: [ServiceChain, role_data, service_names]}
docker_config: {get_attr: [ServiceChain, role_data, docker_config]}
+ DockerConfigScripts:
+ type: OS::Heat::Value
+ properties:
+ type: json
+ value:
+ yaql:
+ expression:
+ # select 'docker_config_scripts' only from services that have it
+ coalesce($.data.service_names, []).zip(coalesce($.data.docker_config_scripts, [])).where($[1] != null).select($[1]).reduce($1.mergeWith($2), {})
+ data:
+ service_names: {get_attr: [ServiceChain, role_data, service_names]}
+ docker_config_scripts: {get_attr: [ServiceChain, role_data, docker_config_scripts]}
+
+ CellV2Discovery:
+ type: OS::Heat::Value
+ properties:
+ type: boolean
+ value:
+ yaql:
+ expression:
+ # If any service in this role requires cellv2_discovery then this value is true
+ coalesce($.data.cellv2_discovery, []).contains(true)
+ data:
+ cellv2_discovery: {get_attr: [ServiceChain, role_data, cellv2_discovery]}
+
LoggingSourcesConfig:
type: OS::Heat::Value
properties:
@@ -282,5 +307,7 @@ outputs:
puppet_config: {get_attr: [PuppetConfig, value]}
kolla_config: {get_attr: [KollaConfig, value]}
docker_config: {get_attr: [DockerConfig, value]}
+ docker_config_scripts: {get_attr: [DockerConfigScripts, value]}
docker_puppet_tasks: {get_attr: [DockerPuppetTasks, value]}
host_prep_tasks: {get_attr: [HostPrepTasks, value]}
+ cellv2_discovery: {get_attr: [CellV2Discovery, value]}
diff --git a/docker/services/nova-api.yaml b/docker/services/nova-api.yaml
index 7f1b7a54..ee73f704 100644
--- a/docker/services/nova-api.yaml
+++ b/docker/services/nova-api.yaml
@@ -113,6 +113,58 @@ outputs:
- path: /var/log/nova
owner: nova:nova
recurse: true
+ docker_config_scripts:
+ nova_api_discover_hosts.sh:
+ mode: "0700"
+ content: |
+ #!/bin/bash
+ export OS_PROJECT_DOMAIN_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken project_domain_name)
+ export OS_USER_DOMAIN_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken user_domain_name)
+ export OS_PROJECT_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken project_name)
+ export OS_USERNAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken username)
+ export OS_PASSWORD=$(crudini --get /etc/nova/nova.conf keystone_authtoken password)
+ export OS_AUTH_URL=$(crudini --get /etc/nova/nova.conf keystone_authtoken auth_url)
+ export OS_AUTH_TYPE=password
+ export OS_IDENTITY_API_VERSION=3
+
+ echo "(cellv2) Running cell_v2 host discovery"
+ timeout=600
+ loop_wait=30
+ declare -A discoverable_hosts
+ for host in $(hiera -c /etc/puppet/hiera.yaml cellv2_discovery_hosts | sed -e '/^nil$/d' | tr "," " "); do discoverable_hosts[$host]=1; done
+ timeout_at=$(( $(date +"%s") + ${timeout} ))
+ echo "(cellv2) Waiting ${timeout} seconds for hosts to register"
+ finished=0
+ while : ; do
+ for host in $(openstack -q compute service list -c 'Host' -c 'Zone' -f value | awk '$2 != "internal" { print $1 }'); do
+ if (( discoverable_hosts[$host] == 1 )); then
+ echo "(cellv2) compute node $host has registered"
+ unset discoverable_hosts[$host]
+ fi
+ done
+ finished=1
+ for host in "${!discoverable_hosts[@]}"; do
+ if (( ${discoverable_hosts[$host]} == 1 )); then
+ echo "(cellv2) compute node $host has not registered"
+ finished=0
+ fi
+ done
+ remaining=$(( $timeout_at - $(date +"%s") ))
+ if (( $finished == 1 )); then
+ echo "(cellv2) All nodes registered"
+ break
+ elif (( $remaining <= 0 )); then
+ echo "(cellv2) WARNING: timeout waiting for nodes to register, running host discovery regardless"
+ echo "(cellv2) Expected host list:" $(hiera -c /etc/puppet/hiera.yaml cellv2_discovery_hosts | sed -e '/^nil$/d' | sort -u | tr ',' ' ')
+ echo "(cellv2) Detected host list:" $(openstack -q compute service list -c 'Host' -c 'Zone' -f value | awk '$2 != "internal" { print $1 }' | sort -u | tr '\n', ' ')
+ break
+ else
+ echo "(cellv2) Waiting ${remaining} seconds for hosts to register"
+ sleep $loop_wait
+ fi
+ done
+ echo "(cellv2) Running host discovery..."
+ su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 discover_hosts --verbose"
docker_config:
# db sync runs before permissions set by kolla_config
step_2:
@@ -223,9 +275,16 @@ outputs:
image: *nova_api_image
net: host
detach: false
- volumes: *nova_api_bootstrap_volumes
+ volumes:
+ list_concat:
+ - *nova_api_bootstrap_volumes
+ -
+ - /var/lib/config-data/nova/etc/my.cnf.d/tripleo.cnf:/etc/my.cnf.d/tripleo.cnf:ro
+ - /var/lib/config-data/nova/etc/nova/:/etc/nova/:ro
+ - /var/log/containers/nova:/var/log/nova
+ - /var/lib/docker-config-scripts/nova_api_discover_hosts.sh:/nova_api_discover_hosts.sh:ro
user: root
- command: "/usr/bin/bootstrap_host_exec nova_api su nova -s /bin/bash -c '/usr/bin/nova-manage cell_v2 discover_hosts --verbose'"
+ command: "/usr/bin/bootstrap_host_exec nova_api /nova_api_discover_hosts.sh"
metadata_settings:
get_attr: [NovaApiBase, role_data, metadata_settings]
host_prep_tasks:
diff --git a/docker/services/nova-compute.yaml b/docker/services/nova-compute.yaml
index b43193ec..6db9e580 100644
--- a/docker/services/nova-compute.yaml
+++ b/docker/services/nova-compute.yaml
@@ -73,6 +73,7 @@ outputs:
description: Role data for the Nova Compute service.
value:
service_name: {get_attr: [NovaComputeBase, role_data, service_name]}
+ cellv2_discovery: true
config_settings:
get_attr: [NovaComputeBase, role_data, config_settings]
logging_source: {get_attr: [NovaComputeBase, role_data, logging_source]}
@@ -111,7 +112,6 @@ outputs:
owner: nova:nova
recurse: true
docker_config:
- # FIXME: run discover hosts here
step_4:
nova_compute:
image: &nova_compute_image {get_param: DockerNovaComputeImage}
diff --git a/overcloud.j2.yaml b/overcloud.j2.yaml
index 3506fe8e..9ea195da 100644
--- a/overcloud.j2.yaml
+++ b/overcloud.j2.yaml
@@ -642,6 +642,21 @@ resources:
{% for role in roles %}
- {get_attr: [{{role.name}}ServiceNames, value]}
{% endfor %}
+ cellv2_discovery_hosts:
+ # Collects compute hostnames for all roles with a service that requires cellv2 host discovery
+ list_join:
+ - ','
+ - yaql:
+ expression: coalesce($.data.e.zip($.data.l).where($[0]).select($[1]).flatten(), [])
+ data:
+ e: # list of true/fails for whether cellsv2 host discovery is required for the roles
+{%- for role in roles %}
+ - {get_attr: [{{role.name}}ServiceChainRoleData, value, cellv2_discovery]}
+{%- endfor %}
+ l: # list of list of compute hostnames for the roles
+{%- for role in roles %}
+ - {get_attr: [{{role.name}}, hostname_map, canonical]}
+{%- endfor %}
controller_ips: {get_attr: [{{primary_role_name}}, ip_address]}
controller_names: {get_attr: [{{primary_role_name}}, hostname]}
service_ips:
diff --git a/puppet/all-nodes-config.j2.yaml b/puppet/all-nodes-config.j2.yaml
index bdd2bcf3..65949625 100644
--- a/puppet/all-nodes-config.j2.yaml
+++ b/puppet/all-nodes-config.j2.yaml
@@ -22,6 +22,8 @@ parameters:
type: json
controller_names:
type: comma_delimited_list
+ cellv2_discovery_hosts:
+ type: comma_delimited_list
NetVipMap:
type: json
RedisVirtualIP:
@@ -141,6 +143,10 @@ resources:
list_join:
- ','
- {get_param: controller_names}
+ - cellv2_discovery_hosts:
+ list_join:
+ - ','
+ - {get_param: cellv2_discovery_hosts}
deploy_identifier: {get_param: DeployIdentifier}
update_identifier: {get_param: UpdateIdentifier}
stack_action: {get_param: StackAction}
diff --git a/puppet/role.role.j2.yaml b/puppet/role.role.j2.yaml
index d53afd04..a3cbe851 100644
--- a/puppet/role.role.j2.yaml
+++ b/puppet/role.role.j2.yaml
@@ -477,6 +477,14 @@ resources:
- '.'
- - {get_attr: [{{server_resource_name}}, name]}
- ctlplane
+ canonical:
+ fqdn:
+ list_join:
+ - '.'
+ - - {get_attr: [{{server_resource_name}}, name]}
+ - {get_param: CloudDomain}
+ short:
+ - {get_attr: [{{server_resource_name}}, name]}
PreNetworkConfig:
type: OS::TripleO::{{role.name}}::PreNetworkConfig
@@ -602,6 +610,7 @@ resources:
fqdn_management: {get_attr: [NetHostMap, value, management, fqdn]}
fqdn_ctlplane: {get_attr: [NetHostMap, value, ctlplane, fqdn]}
fqdn_external: {get_attr: [NetHostMap, value, external, fqdn]}
+ fqdn_canonical: {get_attr: [NetHostMap, value, canonical, fqdn]}
# Resource for site-specific injection of root certificate
NodeTLSCAData:
@@ -696,6 +705,7 @@ outputs:
{{network.name_lower|default(network.name.lower())}}: {get_attr: [NetHostMap, value, {{network.name_lower|default(network.name.lower()) }}, fqdn]}
{%- endfor %}
ctlplane: {get_attr: [NetHostMap, value, ctlplane, fqdn]}
+ canonical: {get_attr: [NetHostMap, value, canonical, fqdn]}
hosts_entry:
value:
str_replace:
diff --git a/tools/yaml-validate.py b/tools/yaml-validate.py
index 76f856db..9279f1db 100755
--- a/tools/yaml-validate.py
+++ b/tools/yaml-validate.py
@@ -31,14 +31,15 @@ envs_containing_endpoint_map = ['tls-endpoints-public-dns.yaml',
'tls-endpoints-public-ip.yaml',
'tls-everywhere-endpoints-dns.yaml']
ENDPOINT_MAP_FILE = 'endpoint_map.yaml'
-OPTIONAL_SECTIONS = ['workflow_tasks']
+OPTIONAL_SECTIONS = ['workflow_tasks', 'cellv2_discovery']
REQUIRED_DOCKER_SECTIONS = ['service_name', 'docker_config', 'puppet_config',
'config_settings', 'step_config']
OPTIONAL_DOCKER_SECTIONS = ['docker_puppet_tasks', 'upgrade_tasks',
'post_upgrade_tasks', 'update_tasks',
'service_config_settings',
'host_prep_tasks', 'metadata_settings',
- 'kolla_config', 'logging_source', 'logging_groups']
+ 'kolla_config', 'logging_source',
+ 'logging_groups', 'docker_config_scripts']
REQUIRED_DOCKER_PUPPET_CONFIG_SECTIONS = ['config_volume', 'step_config',
'config_image']
OPTIONAL_DOCKER_PUPPET_CONFIG_SECTIONS = [ 'puppet_tags', 'volumes' ]