diff options
28 files changed, 404 insertions, 115 deletions
diff --git a/ci/environments/multinode_major_upgrade.yaml b/ci/environments/multinode_major_upgrade.yaml index 6710fef7..6af267bf 100644 --- a/ci/environments/multinode_major_upgrade.yaml +++ b/ci/environments/multinode_major_upgrade.yaml @@ -37,6 +37,14 @@ parameter_defaults: - OS::TripleO::Services::Timezone - OS::TripleO::Services::TripleoPackages - OS::TripleO::Services::TripleoFirewall + - OS::TripleO::Services::NovaConductor + - OS::TripleO::Services::NovaApi + - OS::TripleO::Services::NovaPlacement + - OS::TripleO::Services::NovaMetadata + - OS::TripleO::Services::NovaScheduler + - OS::TripleO::Services::NovaCompute + - OS::TripleO::Services::NovaLibvirt + - OS::TripleO::Services::Pacemaker ControllerExtraConfig: nova::compute::libvirt::services::libvirt_virt_type: qemu nova::compute::libvirt::libvirt_virt_type: qemu diff --git a/docker/docker-puppet.py b/docker/docker-puppet.py index fe87ce7a..86c8ec98 100755 --- a/docker/docker-puppet.py +++ b/docker/docker-puppet.py @@ -23,6 +23,7 @@ import os import subprocess import sys import tempfile +import multiprocessing # this is to match what we do in deployed-server @@ -45,6 +46,15 @@ def pull_image(name): def rm_container(name): + if os.environ.get('SHOW_DIFF', None): + print('Diffing container: %s' % name) + subproc = subprocess.Popen(['/usr/bin/docker', 'diff', name], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + cmd_stdout, cmd_stderr = subproc.communicate() + print(cmd_stdout) + print(cmd_stderr) + print('Removing container: %s' % name) subproc = subprocess.Popen(['/usr/bin/docker', 'rm', name], stdout=subprocess.PIPE, @@ -53,6 +63,8 @@ def rm_container(name): print(cmd_stdout) print(cmd_stderr) +process_count = int(os.environ.get('PROCESS_COUNT', + multiprocessing.cpu_count())) config_file = os.environ.get('CONFIG', '/var/lib/docker-puppet/docker-puppet.json') print('docker-puppet') @@ -106,34 +118,25 @@ for service in (json_data or []): print('Service compilation completed.\n') -for config_volume in configs: - - service = configs[config_volume] - puppet_tags = service[1] or '' - manifest = service[2] or '' - config_image = service[3] or '' - volumes = service[4] if len(service) > 4 else [] - - if puppet_tags: - puppet_tags = "file,file_line,concat,%s" % puppet_tags - else: - puppet_tags = "file,file_line,concat" +def mp_puppet_config((config_volume, puppet_tags, manifest, config_image, volumes)): print('---------') print('config_volume %s' % config_volume) print('puppet_tags %s' % puppet_tags) print('manifest %s' % manifest) print('config_image %s' % config_image) + print('volumes %s' % volumes) hostname = short_hostname() + sh_script = '/var/lib/docker-puppet/docker-puppet-%s.sh' % config_volume - with open('/var/lib/docker-puppet/docker-puppet.sh', 'w') as script_file: + with open(sh_script, 'w') as script_file: os.chmod(script_file.name, 0755) script_file.write("""#!/bin/bash set -ex mkdir -p /etc/puppet cp -a /tmp/puppet-etc/* /etc/puppet rm -Rf /etc/puppet/ssl # not in use and causes permission errors - echo '{"step": 6}' > /etc/puppet/hieradata/docker.json + echo '{"step": %(step)s}' > /etc/puppet/hieradata/docker.json TAGS="" if [ -n "%(puppet_tags)s" ]; then TAGS='--tags "%(puppet_tags)s"' @@ -168,7 +171,8 @@ for config_volume in configs: fi """ % {'puppet_tags': puppet_tags, 'name': config_volume, 'hostname': hostname, - 'no_archive': os.environ.get('NO_ARCHIVE', '')}) + 'no_archive': os.environ.get('NO_ARCHIVE', ''), + 'step': os.environ.get('STEP', '6')}) with tempfile.NamedTemporaryFile() as tmp_man: with open(tmp_man.name, 'w') as man_file: @@ -186,12 +190,12 @@ for config_volume in configs: '--volume', '/usr/share/openstack-puppet/modules/:/usr/share/openstack-puppet/modules/:ro', '--volume', '/var/lib/config-data/:/var/lib/config-data/:rw', '--volume', 'tripleo_logs:/var/log/tripleo/', - '--volume', '/var/lib/docker-puppet/docker-puppet.sh:/var/lib/docker-puppet/docker-puppet.sh:ro'] + '--volume', '%s:%s:rw' % (sh_script, sh_script) ] for volume in volumes: dcmd.extend(['--volume', volume]) - dcmd.extend(['--entrypoint', '/var/lib/docker-puppet/docker-puppet.sh']) + dcmd.extend(['--entrypoint', sh_script]) env = {} if os.environ.get('NET_HOST', 'false') == 'true': @@ -207,6 +211,34 @@ for config_volume in configs: print(cmd_stderr) if subproc.returncode != 0: print('Failed running docker-puppet.py for %s' % config_volume) - sys.exit(subproc.returncode) - else: - rm_container('docker-puppet-%s' % config_volume) + rm_container('docker-puppet-%s' % config_volume) + return subproc.returncode + +# Holds all the information for each process to consume. +# Instead of starting them all linearly we run them using a process +# pool. This creates a list of arguments for the above function +# to consume. +process_map = [] + +for config_volume in configs: + + service = configs[config_volume] + puppet_tags = service[1] or '' + manifest = service[2] or '' + config_image = service[3] or '' + volumes = service[4] if len(service) > 4 else [] + + if puppet_tags: + puppet_tags = "file,file_line,concat,%s" % puppet_tags + else: + puppet_tags = "file,file_line,concat" + + process_map.append([config_volume, puppet_tags, manifest, config_image, volumes]) + +for p in process_map: + print '--\n%s' % p + +# Fire off processes to perform each configuration. Defaults +# to the number of CPUs on the system. +p = multiprocessing.Pool(process_count) +p.map(mp_puppet_config, process_map) diff --git a/docker/post.j2.yaml b/docker/post.j2.yaml index 3473f4ca..e1154a62 100644 --- a/docker/post.j2.yaml +++ b/docker/post.j2.yaml @@ -68,6 +68,7 @@ resources: - name: CONFIG - name: NET_HOST - name: NO_ARCHIVE + - name: STEP {{primary_role_name}}DockerPuppetTasksDeployment{{step}}: type: OS::Heat::SoftwareDeployment @@ -85,6 +86,7 @@ resources: CONFIG: /var/lib/docker-puppet/docker-puppet-tasks{{step}}.json NET_HOST: 'true' NO_ARCHIVE: 'true' + STEP: {{step}} {% endfor %} # END primary_role_name docker-puppet-tasks diff --git a/docker/services/keystone.yaml b/docker/services/keystone.yaml index 1d25da72..2bf8fa09 100644 --- a/docker/services/keystone.yaml +++ b/docker/services/keystone.yaml @@ -144,7 +144,7 @@ outputs: [ 'keystone', 'keystone-manage', 'bootstrap', '--bootstrap-password', {get_param: AdminPassword} ] docker_puppet_tasks: # Keystone endpoint creation occurs only on single node - step_4: + step_3: - 'keystone_init_tasks' - 'keystone_config,keystone_domain_config,keystone_endpoint,keystone_identity_provider,keystone_paste_ini,keystone_role,keystone_service,keystone_tenant,keystone_user,keystone_user_role,keystone_domain' - 'include ::tripleo::profile::base::keystone' diff --git a/docker/services/neutron-api.yaml b/docker/services/neutron-api.yaml index e444f391..dfd1d5c0 100644 --- a/docker/services/neutron-api.yaml +++ b/docker/services/neutron-api.yaml @@ -81,6 +81,9 @@ outputs: net: host privileged: false detach: false + # FIXME: we should make config file permissions right + # and run as neutron user + user: root volumes: - /var/lib/config-data/neutron/etc/neutron:/etc/neutron:ro - /var/lib/config-data/neutron/usr/share/neutron:/usr/share/neutron:ro diff --git a/environments/hyperconverged-ceph.yaml b/environments/hyperconverged-ceph.yaml index 77fa5a49..3738072c 100644 --- a/environments/hyperconverged-ceph.yaml +++ b/environments/hyperconverged-ceph.yaml @@ -11,6 +11,7 @@ parameter_defaults: - OS::TripleO::Services::Timezone - OS::TripleO::Services::Ntp - OS::TripleO::Services::Snmp + - OS::TripleO::Services::Sshd - OS::TripleO::Services::NovaCompute - OS::TripleO::Services::NovaLibvirt - OS::TripleO::Services::Kernel @@ -25,4 +26,6 @@ parameter_defaults: - OS::TripleO::Services::OpenDaylightOvs - OS::TripleO::Services::SensuClient - OS::TripleO::Services::FluentdClient + - OS::TripleO::Services::AuditD + - OS::TripleO::Services::Collectd - OS::TripleO::Services::CephOSD diff --git a/environments/major-upgrade-composable-steps.yaml b/environments/major-upgrade-composable-steps.yaml index 9e3cddba..9ecc2251 100644 --- a/environments/major-upgrade-composable-steps.yaml +++ b/environments/major-upgrade-composable-steps.yaml @@ -7,9 +7,9 @@ parameter_defaults: # Newton to Ocata, we need to remove old hiera hook data and # install ansible heat agents and ansible-pacemaker set -eu + yum install -y openstack-heat-agents yum install -y python-heat-agent-* yum install -y ansible-pacemaker rm -f /usr/libexec/os-apply-config/templates/etc/puppet/hiera.yaml rm -f /usr/libexec/os-refresh-config/configure.d/40-hiera-datafiles rm -f /etc/puppet/hieradata/*.yaml - diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh index 6bfe1239..8b900842 100755 --- a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh @@ -50,6 +50,7 @@ mysql_need_update if [[ -n $(is_bootstrap_node) ]]; then if [ $DO_MYSQL_UPGRADE -eq 1 ]; then + backup_flags="--defaults-extra-file=/root/.my.cnf -u root --flush-privileges --all-databases --single-transaction" mysqldump $backup_flags > "$MYSQL_BACKUP_DIR/openstack_database.sql" cp -rdp /etc/my.cnf* "$MYSQL_BACKUP_DIR" fi diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml index 8c91027d..74d3be71 100644 --- a/extraconfig/tasks/major_upgrade_pacemaker.yaml +++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml @@ -18,10 +18,6 @@ parameters: constraints: - allowed_values: ['auto', 'yes', 'no'] default: 'auto' - IgnoreCephUpgradeWarnings: - type: boolean - default: false - description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean KeepSaharaServicesOnUpgrade: type: boolean default: true diff --git a/network/ports/net_ip_list_map.yaml b/network/ports/net_ip_list_map.yaml index 5782bbe9..83d875e8 100644 --- a/network/ports/net_ip_list_map.yaml +++ b/network/ports/net_ip_list_map.yaml @@ -35,6 +35,32 @@ parameters: default: [] type: json + InternalApiNetName: + default: internal_api + description: The name of the internal API network. + type: string + ExternalNetName: + default: external + description: The name of the external network. + type: string + ManagementNetName: + default: management + description: The name of the management network. + type: string + StorageNetName: + default: storage + description: The name of the storage network. + type: string + StorageMgmtNetName: + default: storage_mgmt + description: The name of the Storage management network. + type: string + TenantNetName: + default: tenant + description: The name of the tenant network. + type: string + + resources: # This adds the extra "services" on for keystone # so that keystone_admin_api_network and @@ -58,19 +84,33 @@ resources: - keystone_admin_api - keystone_public_api + NetIpMapValue: + type: OS::Heat::Value + properties: + type: json + value: + map_replace: + - ctlplane: {get_param: ControlPlaneIpList} + external: {get_param: ExternalIpList} + internal_api: {get_param: InternalApiIpList} + storage: {get_param: StorageIpList} + storage_mgmt: {get_param: StorageMgmtIpList} + tenant: {get_param: TenantIpList} + management: {get_param: ManagementIpList} + - keys: + external: {get_param: ExternalNetName} + internal_api: {get_param: InternalApiNetName} + storage: {get_param: StorageNetName} + storage_mgmt: {get_param: StorageMgmtNetName} + tenant: {get_param: TenantNetName} + management: {get_param: ManagementNetName} + outputs: net_ip_map: description: > A Hash containing a mapping of network names to assigned lists of IP addresses. - value: - ctlplane: {get_param: ControlPlaneIpList} - external: {get_param: ExternalIpList} - internal_api: {get_param: InternalApiIpList} - storage: {get_param: StorageIpList} - storage_mgmt: {get_param: StorageMgmtIpList} - tenant: {get_param: TenantIpList} - management: {get_param: ManagementIpList} + value: {get_attr: [NetIpMapValue, value]} service_ips: description: > Map of enabled services to a list of their IP addresses @@ -92,14 +132,7 @@ outputs: for_each: SERVICE: {get_attr: [EnabledServicesValue, value]} - values: {get_param: ServiceNetMap} - - values: - ctlplane: {get_param: ControlPlaneIpList} - external: {get_param: ExternalIpList} - internal_api: {get_param: InternalApiIpList} - storage: {get_param: StorageIpList} - storage_mgmt: {get_param: StorageMgmtIpList} - tenant: {get_param: TenantIpList} - management: {get_param: ManagementIpList} + - values: {get_attr: [NetIpMapValue, value]} service_hostnames: description: > Map of enabled services to a list of hostnames where they're running diff --git a/network/ports/net_ip_map.yaml b/network/ports/net_ip_map.yaml index c8cf733f..c974d72e 100644 --- a/network/ports/net_ip_map.yaml +++ b/network/ports/net_ip_map.yaml @@ -69,35 +69,136 @@ parameters: type: string description: IP address with brackets in case of IPv6 + InternalApiNetName: + default: internal_api + description: The name of the internal API network. + type: string + ExternalNetName: + default: external + description: The name of the external network. + type: string + ManagementNetName: + default: management + description: The name of the management network. + type: string + StorageNetName: + default: storage + description: The name of the storage network. + type: string + StorageMgmtNetName: + default: storage_mgmt + description: The name of the Storage management network. + type: string + TenantNetName: + default: tenant + description: The name of the tenant network. + type: string + +resources: + + NetIpMapValue: + type: OS::Heat::Value + properties: + type: json + value: + map_replace: + - ctlplane: {get_param: ControlPlaneIp} + external: {get_param: ExternalIp} + internal_api: {get_param: InternalApiIp} + storage: {get_param: StorageIp} + storage_mgmt: {get_param: StorageMgmtIp} + tenant: {get_param: TenantIp} + management: {get_param: ManagementIp} + ctlplane_subnet: + list_join: + - '' + - - {get_param: ControlPlaneIp} + - '/' + - {get_param: ControlPlaneSubnetCidr} + external_subnet: {get_param: ExternalIpSubnet} + internal_api_subnet: {get_param: InternalApiIpSubnet} + storage_subnet: {get_param: StorageIpSubnet} + storage_mgmt_subnet: {get_param: StorageMgmtIpSubnet} + tenant_subnet: {get_param: TenantIpSubnet} + management_subnet: {get_param: ManagementIpSubnet} + ctlplane_uri: {get_param: ControlPlaneIp} + external_uri: {get_param: ExternalIpUri} + internal_api_uri: {get_param: InternalApiIpUri} + storage_uri: {get_param: StorageIpUri} + storage_mgmt_uri: {get_param: StorageMgmtIpUri} + tenant_uri: {get_param: TenantIpUri} + management_uri: {get_param: ManagementIpUri} + - keys: + external: {get_param: ExternalNetName} + internal_api: {get_param: InternalApiNetName} + storage: {get_param: StorageNetName} + storage_mgmt: {get_param: StorageMgmtNetName} + tenant: {get_param: TenantNetName} + management: {get_param: ManagementNetName} + external_subnet: + str_replace: + template: NAME_subnet + params: + NAME: {get_param: ExternalNetName} + internal_api_subnet: + str_replace: + template: NAME_subnet + params: + NAME: {get_param: InternalApiNetName} + storage_subnet: + str_replace: + template: NAME_subnet + params: + NAME: {get_param: StorageNetName} + storage_mgmt_subnet: + str_replace: + template: NAME_subnet + params: + NAME: {get_param: StorageMgmtNetName} + tenant_subnet: + str_replace: + template: NAME_subnet + params: + NAME: {get_param: TenantNetName} + management_subnet: + str_replace: + template: NAME_subnet + params: + NAME: {get_param: ManagementNetName} + external_uri: + str_replace: + template: NAME_uri + params: + NAME: {get_param: ExternalNetName} + internal_api_uri: + str_replace: + template: NAME_uri + params: + NAME: {get_param: InternalApiNetName} + storage_uri: + str_replace: + template: NAME_uri + params: + NAME: {get_param: StorageNetName} + storage_mgmt_uri: + str_replace: + template: NAME_uri + params: + NAME: {get_param: StorageMgmtNetName} + tenant_uri: + str_replace: + template: NAME_uri + params: + NAME: {get_param: TenantNetName} + management_uri: + str_replace: + template: NAME_uri + params: + NAME: {get_param: ManagementNetName} + outputs: net_ip_map: description: > A Hash containing a mapping of network names to assigned IPs for a specific machine. - value: - ctlplane: {get_param: ControlPlaneIp} - external: {get_param: ExternalIp} - internal_api: {get_param: InternalApiIp} - storage: {get_param: StorageIp} - storage_mgmt: {get_param: StorageMgmtIp} - tenant: {get_param: TenantIp} - management: {get_param: ManagementIp} - ctlplane_subnet: - list_join: - - '' - - - {get_param: ControlPlaneIp} - - '/' - - {get_param: ControlPlaneSubnetCidr} - external_subnet: {get_param: ExternalIpSubnet} - internal_api_subnet: {get_param: InternalApiIpSubnet} - storage_subnet: {get_param: StorageIpSubnet} - storage_mgmt_subnet: {get_param: StorageMgmtIpSubnet} - tenant_subnet: {get_param: TenantIpSubnet} - management_subnet: {get_param: ManagementIpSubnet} - ctlplane_uri: {get_param: ControlPlaneIp} - external_uri: {get_param: ExternalIpUri} - internal_api_uri: {get_param: InternalApiIpUri} - storage_uri: {get_param: StorageIpUri} - storage_mgmt_uri: {get_param: StorageMgmtIpUri} - tenant_uri: {get_param: TenantIpUri} - management_uri: {get_param: ManagementIpUri} + value: {get_attr: [NetIpMapValue, value]} diff --git a/overcloud.j2.yaml b/overcloud.j2.yaml index e9447b94..e99f770f 100644 --- a/overcloud.j2.yaml +++ b/overcloud.j2.yaml @@ -243,6 +243,12 @@ resources: NetIpMap: {get_attr: [VipMap, net_ip_map]} ServiceNetMap: {get_attr: [ServiceNetMap, service_net_map]} + EndpointMapData: + type: OS::Heat::Value + properties: + type: json + value: {get_attr: [EndpointMap, endpoint_map]} + # Jinja loop for Role in roles_data.yaml {% for role in roles %} # Resources generated for {{role.name}} Role @@ -634,7 +640,7 @@ outputs: value: true KeystoneURL: description: URL for the Overcloud Keystone service - value: {get_attr: [EndpointMap, endpoint_map, KeystonePublic, uri]} + value: {get_attr: [EndpointMapData, value, KeystonePublic, uri]} KeystoneAdminVip: description: Keystone Admin VIP endpoint value: {get_attr: [VipMap, net_ip_map, {get_attr: [ServiceNetMap, service_net_map, KeystoneAdminApiNetwork]}]} @@ -643,7 +649,7 @@ outputs: Mapping of the resources with the needed info for their endpoints. This includes the protocol used, the IP, port and also a full representation of the URI. - value: {get_attr: [EndpointMap, endpoint_map]} + value: {get_attr: [EndpointMapData, value]} HostsEntry: description: | The content that should be appended to your /etc/hosts if you want to get diff --git a/puppet/all-nodes-config.yaml b/puppet/all-nodes-config.yaml index ee43c3a5..7edf17af 100644 --- a/puppet/all-nodes-config.yaml +++ b/puppet/all-nodes-config.yaml @@ -68,6 +68,32 @@ parameters: type: boolean default: false + InternalApiNetName: + default: internal_api + description: The name of the internal API network. + type: string + ExternalNetName: + default: external + description: The name of the external network. + type: string + ManagementNetName: + default: management + description: The name of the management network. + type: string + StorageNetName: + default: storage + description: The name of the storage network. + type: string + StorageMgmtNetName: + default: storage_mgmt + description: The name of the Storage management network. + type: string + TenantNetName: + default: tenant + description: The name of the tenant network. + type: string + + resources: allNodesConfigImpl: @@ -175,21 +201,21 @@ resources: get_param: [NetVipMap, {get_param: [ServiceNetMap, keystone_admin_api_network]}] keystone_public_api_vip: get_param: [NetVipMap, {get_param: [ServiceNetMap, keystone_public_api_network]}] - public_virtual_ip: {get_param: [NetVipMap, external]} + public_virtual_ip: {get_param: [NetVipMap, {get_param: ExternalNetName}]} controller_virtual_ip: {get_param: [NetVipMap, ctlplane]} - internal_api_virtual_ip: {get_param: [NetVipMap, internal_api]} - storage_virtual_ip: {get_param: [NetVipMap, storage]} - storage_mgmt_virtual_ip: {get_param: [NetVipMap, storage_mgmt]} + internal_api_virtual_ip: {get_param: [NetVipMap, {get_param: InternalApiNetName}]} + storage_virtual_ip: {get_param: [NetVipMap, {get_param: StorageNetName}]} + storage_mgmt_virtual_ip: {get_param: [NetVipMap, {get_param: StorageMgmtNetName}]} redis_vip: {get_param: RedisVirtualIP} # public_virtual_ip and controller_virtual_ip are needed in # both HAproxy & keepalived. - tripleo::haproxy::public_virtual_ip: {get_param: [NetVipMap, external]} + tripleo::haproxy::public_virtual_ip: {get_param: [NetVipMap, {get_param: ExternalNetName}]} tripleo::haproxy::controller_virtual_ip: {get_param: [NetVipMap, ctlplane]} - tripleo::keepalived::public_virtual_ip: {get_param: [NetVipMap, external]} + tripleo::keepalived::public_virtual_ip: {get_param: [NetVipMap, {get_param: ExternalNetName}]} tripleo::keepalived::controller_virtual_ip: {get_param: [NetVipMap, ctlplane]} - tripleo::keepalived::internal_api_virtual_ip: {get_param: [NetVipMap, internal_api]} - tripleo::keepalived::storage_virtual_ip: {get_param: [NetVipMap, storage]} - tripleo::keepalived::storage_mgmt_virtual_ip: {get_param: [NetVipMap, storage_mgmt]} + tripleo::keepalived::internal_api_virtual_ip: {get_param: [NetVipMap, {get_param: InternalApiNetName}]} + tripleo::keepalived::storage_virtual_ip: {get_param: [NetVipMap, {get_param: StorageNetName}]} + tripleo::keepalived::storage_mgmt_virtual_ip: {get_param: [NetVipMap, {get_param: StorageMgmtNetName}]} tripleo::keepalived::redis_virtual_ip: {get_param: RedisVirtualIP} tripleo::redis_notification::haproxy_monitor_ip: {get_param: [NetVipMap, ctlplane]} cloud_name_external: {get_param: cloud_name_external} diff --git a/puppet/services/README.rst b/puppet/services/README.rst index e5c11535..223c3ed0 100644 --- a/puppet/services/README.rst +++ b/puppet/services/README.rst @@ -104,7 +104,9 @@ step, "step2" for the second, etc. 2) Stop all control-plane services, ready for upgrade - 3) Perform a package update, (either specific packages or the whole system) + 3) Perform a package update and install new packages: A general + upgrade is done, and only new package should go into service + ansible tasks. 4) Start services needed for migration tasks (e.g DB) diff --git a/puppet/services/ceph-mon.yaml b/puppet/services/ceph-mon.yaml index 1ce58335..d589ef89 100644 --- a/puppet/services/ceph-mon.yaml +++ b/puppet/services/ceph-mon.yaml @@ -59,6 +59,14 @@ parameters: } default: {} type: json + CephValidationRetries: + type: number + default: 5 + description: Number of retry attempts for Ceph validation + CephValidationDelay: + type: number + default: 10 + description: Interval (in seconds) in between validation checks MonitoringSubscriptionCephMon: default: 'overcloud-ceph-mon' type: string @@ -119,21 +127,32 @@ outputs: # rolling upgrade of all osd nodes in step1 - name: Check status tags: step0,validation - shell: ceph health | grep -qv HEALTH_ERR - # FIXME(shardy) I suspect we can use heat or ansible facts here instead? - - name: Get hostname + shell: ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN" + - name: Stop CephMon tags: step0 - shell: hostname -s - register: mon_id - - name: Stop Ceph Mon + service: + name: ceph-mon@{{ ansible_hostname }} + state: stopped + - name: Update Ceph packages tags: step0 - service: name=ceph-mon@{{mon_id.stdout}} pattern=ceph-mon state=stopped - - name: Update ceph packages + yum: + name: ceph-mon + state: latest + - name: Start CephMon tags: step0 - yum: name=ceph-mon state=latest - - name: Start ceph-mon service - tags: step0 - service: name=ceph-mon@{{mon_id.stdout}} state=started + service: + name: ceph-mon@{{ ansible_hostname }} + state: started + # ceph-ansible + # https://github.com/ceph/ceph-ansible/blob/master/infrastructure-playbooks/rolling_update.yml#L149-L157 + - name: Wait for the monitor to join the quorum... + tags: step0,ceph_quorum_validation + shell: | + ceph -s | grep monmap | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }} + register: ceph_quorum_nodecheck + until: ceph_quorum_nodecheck.rc == 0 + retries: {get_param: CephValidationRetries} + delay: {get_param: CephValidationDelay} - name: ceph osd crush tunables default tags: step0 shell: ceph osd crush tunables default diff --git a/puppet/services/ceph-osd.yaml b/puppet/services/ceph-osd.yaml index 9bd83aab..a97fa116 100644 --- a/puppet/services/ceph-osd.yaml +++ b/puppet/services/ceph-osd.yaml @@ -21,6 +21,24 @@ parameters: MonitoringSubscriptionCephOsd: default: 'overcloud-ceph-osd' type: string + CephValidationRetries: + type: number + default: 40 + description: Number of retry attempts for Ceph validation + CephValidationDelay: + type: number + default: 30 + description: Interval (in seconds) in between validation checks + IgnoreCephUpgradeWarnings: + type: boolean + default: false + description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean + +parameter_groups: +- label: deprecated + description: Do not use deprecated params, they will be removed. + parameters: + - IgnoreCephUpgradeWarnings resources: CephBase: @@ -66,17 +84,37 @@ outputs: - name: ceph osd set noscrub tags: step1 command: ceph osd set noscrub - - name: Stop Ceph OSD + - name: Stop CephOSD tags: step1 - service: name=ceph-osd@{{ item }} state=stopped + service: + name: ceph-osd@{{ item }} + state: stopped with_items: "{{osd_ids.stdout.strip().split()}}" - - name: Update ceph OSD packages + - name: Update Ceph packages tags: step1 - yum: name=ceph-osd state=latest - - name: Start ceph-osd service + yum: + name: ceph-osd + state: latest + - name: Start CephOSD tags: step1 - service: name=ceph-osd@{{ item }} state=started + service: + name: ceph-osd@{{ item }} + state: started with_items: "{{osd_ids.stdout.strip().split()}}" + # with awk we are meant to check if $2 and $4 are *the same* but it returns 1 when + # they are, so the check is inverted to produce an useful exit code + - name: Wait for clean pgs... + tags: step1,ceph_pgs_clean_validation + vars: + ignore_warnings: {get_param: IgnoreCephUpgradeWarnings} + shell: | + ceph pg stat | awk '{exit($2!=$4)}' && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN" + register: ceph_pgs_healthcheck + until: ceph_pgs_healthcheck.rc == 0 + retries: {get_param: CephValidationRetries} + delay: {get_param: CephValidationDelay} + when: + - not ignore_warnings - name: ceph osd unset noout tags: step1 command: ceph osd unset noout diff --git a/puppet/services/ceph-rgw.yaml b/puppet/services/ceph-rgw.yaml index d7014e54..c5b29c7e 100644 --- a/puppet/services/ceph-rgw.yaml +++ b/puppet/services/ceph-rgw.yaml @@ -87,4 +87,6 @@ outputs: tags: step0,validation - name: Stop RGW instance tags: step1 - service: name=ceph-radosgw@{{rgw_id.stdout}} state=stopped + service: + name: ceph-radosgw@{{rgw_id.stdout}} + state: stopped diff --git a/puppet/services/cinder-api.yaml b/puppet/services/cinder-api.yaml index 8c5a07ac..bde3e044 100644 --- a/puppet/services/cinder-api.yaml +++ b/puppet/services/cinder-api.yaml @@ -86,7 +86,8 @@ outputs: cinder::keystone::authtoken::project_name: 'service' cinder::api::enable_proxy_headers_parsing: true - cinder::api::nova_catalog_info: 'compute:Compute Service:internalURL' + cinder::api::nova_catalog_info: 'compute:nova:internalURL' + cinder::api::nova_catalog_admin_info: 'compute:nova:adminURL' # TODO(emilien) move it to puppet-cinder cinder::config: DEFAULT/swift_catalog_info: diff --git a/puppet/services/kernel.yaml b/puppet/services/kernel.yaml index 29157959..fec455d1 100644 --- a/puppet/services/kernel.yaml +++ b/puppet/services/kernel.yaml @@ -31,6 +31,7 @@ outputs: config_settings: kernel_modules: nf_conntrack: {} + ip_conntrack_proto_sctp: {} sysctl_settings: net.ipv4.tcp_keepalive_intvl: value: 1 diff --git a/puppet/services/nova-api.yaml b/puppet/services/nova-api.yaml index 0adefecd..f27b53f2 100644 --- a/puppet/services/nova-api.yaml +++ b/puppet/services/nova-api.yaml @@ -197,9 +197,6 @@ outputs: - name: Stop and disable nova_api service (pre-upgrade not under httpd) tags: step2 service: name=openstack-nova-api state=stopped enabled=no - - name: update nova api - tags: step2 - yum: name=openstack-nova-api state=latest - name: Create puppet manifest to set transport_url in nova.conf tags: step5 when: is_bootstrap_node diff --git a/puppet/services/nova-conductor.yaml b/puppet/services/nova-conductor.yaml index 7b086536..a19d0f8d 100644 --- a/puppet/services/nova-conductor.yaml +++ b/puppet/services/nova-conductor.yaml @@ -69,13 +69,10 @@ outputs: - name: Stop nova_conductor service tags: step2 service: name=openstack-nova-conductor state=stopped - - name: update nova conductor - tags: step2 - yum: name=openstack-nova-conductor state=latest # If not already set by puppet (e.g a pre-ocata version), set the # upgrade_level for compute to "auto" - name: Set compute upgrade level to auto - tags: step3 + tags: step2 ini_file: str_replace: template: "dest=/etc/nova/nova.conf section=upgrade_levels option=compute value=LEVEL" diff --git a/puppet/services/nova-scheduler.yaml b/puppet/services/nova-scheduler.yaml index 0e0b9d1e..e08bf182 100644 --- a/puppet/services/nova-scheduler.yaml +++ b/puppet/services/nova-scheduler.yaml @@ -67,6 +67,3 @@ outputs: - name: Stop nova_scheduler service tags: step2 service: name=openstack-nova-scheduler state=stopped - - name: update nova scheduler - tags: step2 - yum: name=openstack-nova-scheduler state=latest diff --git a/puppet/services/pacemaker.yaml b/puppet/services/pacemaker.yaml index ca21cfbe..9398d6b5 100644 --- a/puppet/services/pacemaker.yaml +++ b/puppet/services/pacemaker.yaml @@ -143,5 +143,5 @@ outputs: pacemaker_cluster: state=online - name: Check pacemaker resource tags: step4 - pacemaker_resource: state=started resource={{item}} check_mode=true wait_for_resource=true timeout=200 + pacemaker_resource: state=started resource={{item}} check_mode=true wait_for_resource=true timeout=500 with_items: {get_param: PacemakerResources} diff --git a/puppet/services/panko-api.yaml b/puppet/services/panko-api.yaml index 4b74ad45..fc127a27 100644 --- a/puppet/services/panko-api.yaml +++ b/puppet/services/panko-api.yaml @@ -84,3 +84,9 @@ outputs: include tripleo::profile::base::panko::api metadata_settings: get_attr: [ApacheServiceBase, role_data, metadata_settings] + upgrade_tasks: + # The panko API isn't installed in newton images, so install + # it on upgrade + - name: Install openstack-panko-api packages on upgrade + tags: step3 + yum: name=openstack-panko-api state=latest diff --git a/puppet/services/sahara-engine.yaml b/puppet/services/sahara-engine.yaml index 987fe25b..176514ec 100644 --- a/puppet/services/sahara-engine.yaml +++ b/puppet/services/sahara-engine.yaml @@ -53,6 +53,3 @@ outputs: - name: Stop sahara_engine service tags: step2 service: name=openstack-sahara-engine state=stopped - - name: Sync sahara_engine DB - tags: step5 - command: sahara-db-manage --config-file /etc/sahara/sahara.conf upgrade head diff --git a/puppet/services/swift-proxy.yaml b/puppet/services/swift-proxy.yaml index 526fa888..c941b598 100644 --- a/puppet/services/swift-proxy.yaml +++ b/puppet/services/swift-proxy.yaml @@ -138,6 +138,7 @@ outputs: - '' - 'proxy-logging' - 'proxy-server' + swift::proxy::ceilometer::rabbit_use_ssl: {get_param: RabbitClientUseSSL} swift::proxy::account_autocreate: true # NOTE: bind IP is found in Heat replacing the network name with the # local node IP for the given network; replacement examples diff --git a/releasenotes/notes/manila-with-managed-ceph-e5178fd06127624f.yaml b/releasenotes/notes/manila-with-managed-ceph-e5178fd06127624f.yaml new file mode 100644 index 00000000..59f1fb99 --- /dev/null +++ b/releasenotes/notes/manila-with-managed-ceph-e5178fd06127624f.yaml @@ -0,0 +1,11 @@ +--- +prelude: > + Support for Manila/CephFS with TripleO managed Ceph cluster +features: + - | + It is now possible to configure Manila with CephFS to use a + TripleO managed Ceph cluster. When using the Heat environment + file at environments/manila-cephfsnative-config.yaml Manila + will be configured to use the TripleO managed Ceph cluster + if CephMDS is deployed as well, which can be done using the + file environments/services/ceph-mds.yaml
\ No newline at end of file diff --git a/releasenotes/notes/swift-ring-keeper-c04b440d7d5ce13f.yaml b/releasenotes/notes/swift-ring-keeper-c04b440d7d5ce13f.yaml new file mode 100644 index 00000000..e9974a20 --- /dev/null +++ b/releasenotes/notes/swift-ring-keeper-c04b440d7d5ce13f.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Swift rings created or updated on the overcloud nodes will now be + stored on the undercloud at the end of each deployment. They will be + retrieved before any deployment update, and by doing this the Swift + rings will be in a consistent state across the cluster all the time. + This makes it possible to add, remove or replace nodes without + manual operator interaction. |