From fe87c23ce3450ff8519e9c7d62cb879903519069 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Sat, 15 Dec 2018 13:41:31 -0500 Subject: Attempting to fix NFS issues Issues still persist where sometimes instances fail to start due to a failure with os.utime to read the file path. This could be some bad race condition between qemu/nova while copying images on the NFS. This patch adds more ports to open in firewall, and changes initial directory owner to nfsnobody. Also, includes a patch to fix an apparent race condition when nova sends a remote call to the privsep helper daemon to modify the time of the base file owned by qemu: https://review.openstack.org/#/c/625741/ Includes another fix for patching container images where the docker image was not being detected correctly because the full gerrit project name including 'openstack/' prefix was being used to search tripleo docker images. Additionally, there were more bugs around patching openstack python containers where the patch was not being applied correctly. JIRA: APEX-654 Change-Id: I1d011035486298d5906038922e69d478c383c3f7 Signed-off-by: Tim Rozet (cherry picked from commit e1d286e89e04577bda2569a5909dfe8182d953ba) --- apex/builders/common_builder.py | 43 ++++++++++++++++++---- apex/overcloud/deploy.py | 3 ++ build/csit-environment.yaml | 26 +++++++++---- build/csit-queens-environment.yaml | 26 +++++++++---- config/deploy/common-patches.yaml | 6 +++ lib/ansible/playbooks/patch_containers.yml | 13 +++++++ .../playbooks/prepare_overcloud_containers.yml | 20 +++++++--- 7 files changed, 109 insertions(+), 28 deletions(-) create mode 100644 lib/ansible/playbooks/patch_containers.yml diff --git a/apex/builders/common_builder.py b/apex/builders/common_builder.py index d3ddae48..170ee322 100644 --- a/apex/builders/common_builder.py +++ b/apex/builders/common_builder.py @@ -59,12 +59,13 @@ def project_to_path(project, patch=None): def project_to_docker_image(project): """ Translates OpenStack project to OOO services that are containerized - :param project: name of OpenStack project + :param project: short name of OpenStack project :return: List of OOO docker service names """ # Fetch all docker containers in docker hub with tripleo and filter # based on project - + logging.info("Checking for docker images matching project: {}".format( + project)) hub_output = utils.open_webpage( urllib.parse.urljoin(con.DOCKERHUB_OOO, '?page_size=1024'), timeout=10) try: @@ -81,6 +82,8 @@ def project_to_docker_image(project): for result in results: if result['name'].startswith("centos-binary-{}".format(project)): # add as docker image shortname (just service name) + logging.debug("Adding docker image {} for project {} for " + "patching".format(result['name'], project)) docker_images.append(result['name'].replace('centos-binary-', '')) return docker_images @@ -177,7 +180,15 @@ def add_upstream_patches(patches, image, tmp_dir, if docker_tag and 'python' in project_path: # Projects map to multiple THT services, need to check which # are supported - ooo_docker_services = project_to_docker_image(patch['project']) + project_short_name = os.path.basename(patch['project']) + ooo_docker_services = project_to_docker_image(project_short_name) + if not ooo_docker_services: + logging.error("Did not find any matching docker containers " + "for project: {}".format(project_short_name)) + raise exc.ApexCommonBuilderException( + 'Unable to find docker services for python project in ' + 'patch') + # Just use the first image to see if patch was promoted into it docker_img = ooo_docker_services[0] else: ooo_docker_services = [] @@ -191,24 +202,38 @@ def add_upstream_patches(patches, image, tmp_dir, if patch_diff and not patch_promoted: patch_file = "{}.patch".format(patch['change-id']) + patch_file_paths = [] # If we found services, then we treat the patch like it applies to # docker only if ooo_docker_services: os_version = default_branch.replace('stable/', '') for service in ooo_docker_services: docker_services = docker_services.union({service}) + # We need to go root to be able to install patch and then + # switch back to previous user. Some containers that + # have the same name as the project do not necessarily + # contain the project code. For example + # novajoin-notifier does not contain nova package code. + # Therefore we must try to patch and unfortunately + # ignore failures until we have a better way of checking + # this docker_cmds = [ "WORKDIR {}".format(project_path), + "USER root", + "ARG REAL_USER", + "RUN yum -y install patch", "ADD {} {}".format(patch_file, project_path), - "RUN patch -p1 < {}".format(patch_file) + "RUN patch -p1 < {} || echo " + "'Patching failed'".format(patch_file), + "USER $REAL_USER" ] src_img_uri = "{}:8787/tripleo{}/centos-binary-{}:" \ "{}".format(uc_ip, os_version, service, docker_tag) oc_builder.build_dockerfile(service, tmp_dir, docker_cmds, src_img_uri) - patch_file_path = os.path.join(tmp_dir, 'containers', - patch_file) + patch_file_paths.append(os.path.join( + tmp_dir, "containers/{}".format(service), patch_file)) else: patch_file_path = os.path.join(tmp_dir, patch_file) virt_ops.extend([ @@ -218,8 +243,10 @@ def add_upstream_patches(patches, image, tmp_dir, project_path, patch_file)}]) logging.info("Adding patch {} to {}".format(patch_file, image)) - with open(patch_file_path, 'w') as fh: - fh.write(patch_diff) + patch_file_paths.append(patch_file_path) + for patch_fp in patch_file_paths: + with open(patch_fp, 'w') as fh: + fh.write(patch_diff) else: logging.info("Ignoring patch:\n{}".format(patch)) if len(virt_ops) > 1: diff --git a/apex/overcloud/deploy.py b/apex/overcloud/deploy.py index 39d26c8a..5491a6f2 100644 --- a/apex/overcloud/deploy.py +++ b/apex/overcloud/deploy.py @@ -446,6 +446,9 @@ def prep_image(ds, ns, img, tmp_dir, root_pw=None, docker_tag=None, {con.VIRT_RUN_CMD: "chmod 777 /glance"}, {con.VIRT_RUN_CMD: "chmod 777 /cinder"}, {con.VIRT_RUN_CMD: "chmod 777 /nova"}, + {con.VIRT_RUN_CMD: "chown nfsnobody:nfsnobody /glance"}, + {con.VIRT_RUN_CMD: "chown nfsnobody:nfsnobody /cinder"}, + {con.VIRT_RUN_CMD: "chown nfsnobody:nfsnobody /nova"}, {con.VIRT_RUN_CMD: "echo '/glance *(rw,sync," "no_root_squash,no_acl)' > /etc/exports"}, {con.VIRT_RUN_CMD: "echo '/cinder *(rw,sync," diff --git a/build/csit-environment.yaml b/build/csit-environment.yaml index 58676dc6..36e2ddb5 100644 --- a/build/csit-environment.yaml +++ b/build/csit-environment.yaml @@ -15,16 +15,28 @@ parameter_defaults: tripleo::ringbuilder::build_ring: false nova::api::default_floating_pool: 'external' ControllerExtraConfig: - tripleo::firewall:firewall_rules: - '139 allow NFS': - dport: 2049 + tripleo::firewall::firewall_rules: + '139 allow NFS TCP': + dport: + - 2049 + - 111 + - 32765 + proto: tcp + action: accept + '140 allow NFS UDP': + dport: + - 2049 + - 111 + - 32765 + proto: udp + action: accept GlanceNfsEnabled: true - GlanceNfsShare: overcloud-controller-0.opnfvlf.org:/glance + GlanceNfsShare: overcloud-controller-0.ctlplane.opnfvlf.org:/glance GlanceNfsOptions: - 'rw,sync,nosharecache,context=system_u:object_r:glance_var_lib_t:s0' + 'rw,sync,context=system_u:object_r:glance_var_lib_t:s0' NovaNfsEnabled: true - NovaNfsShare: overcloud-controller-0.opnfvlf.org:/nova - NovaNfsOptions: 'rw,sync,nosharecache,context=system_u:object_r:nfs_t:s0' + NovaNfsShare: overcloud-controller-0.ctlplane.opnfvlf.org:/nova + NovaNfsOptions: 'rw,sync,context=system_u:object_r:nfs_t:s0' DockerPuppetProcessCount: 10 NeutronNetworkVLANRanges: 'datacentre:500:525' SshServerOptions: diff --git a/build/csit-queens-environment.yaml b/build/csit-queens-environment.yaml index 2252bb02..82a8c88c 100644 --- a/build/csit-queens-environment.yaml +++ b/build/csit-queens-environment.yaml @@ -15,16 +15,28 @@ parameter_defaults: tripleo::ringbuilder::build_ring: false nova::api::default_floating_pool: 'external' ControllerExtraConfig: - tripleo::firewall:firewall_rules: - '139 allow NFS': - dport: 2049 + tripleo::firewall::firewall_rules: + '139 allow NFS TCP': + dport: + - 2049 + - 111 + - 32765 + proto: tcp + action: accept + '140 allow NFS UDP': + dport: + - 2049 + - 111 + - 32765 + proto: udp + action: accept GlanceNfsEnabled: true - GlanceNfsShare: overcloud-controller-0.opnfvlf.org:/glance + GlanceNfsShare: overcloud-controller-0.ctlplane.opnfvlf.org:/glance GlanceNfsOptions: - 'rw,sync,nosharecache,context=system_u:object_r:glance_var_lib_t:s0' + 'rw,sync,context=system_u:object_r:glance_var_lib_t:s0' NovaNfsEnabled: true - NovaNfsShare: overcloud-controller-0.opnfvlf.org:/nova - NovaNfsOptions: 'rw,sync,nosharecache,context=system_u:object_r:nfs_t:s0' + NovaNfsShare: overcloud-controller-0.ctlplane.opnfvlf.org:/nova + NovaNfsOptions: 'rw,sync,context=system_u:object_r:nfs_t:s0' DockerPuppetProcessCount: 10 NeutronNetworkVLANRanges: 'datacentre:500:525' SshServerOptions: diff --git a/config/deploy/common-patches.yaml b/config/deploy/common-patches.yaml index ac006bdd..a3149c1a 100644 --- a/config/deploy/common-patches.yaml +++ b/config/deploy/common-patches.yaml @@ -11,6 +11,9 @@ patches: project: openstack/puppet-tripleo - change-id: I93e3d355625508fdc42f44bdd358f3ba86fbd8d7 project: openstack/puppet-tripleo + - change-id: Id68aa27a8ab08d9c00655e5ed6b48d194aa8e6f6 + project: openstack/nova + branch: master queens: undercloud: - change-id: I966bf7f6f8d1cbc656abfad59e8bb927e1aa53c2 @@ -20,3 +23,6 @@ patches: project: openstack/puppet-tripleo - change-id: I93e3d355625508fdc42f44bdd358f3ba86fbd8d7 project: openstack/puppet-tripleo + - change-id: Id68aa27a8ab08d9c00655e5ed6b48d194aa8e6f6 + project: openstack/nova + branch: master diff --git a/lib/ansible/playbooks/patch_containers.yml b/lib/ansible/playbooks/patch_containers.yml new file mode 100644 index 00000000..f7b85137 --- /dev/null +++ b/lib/ansible/playbooks/patch_containers.yml @@ -0,0 +1,13 @@ +--- + - name: "Pull docker image to ensure it exists locally: {{ item }}" + shell: docker pull {{ undercloud_ip }}:8787/tripleo{{ os_version }}/centos-binary-{{ item }}:current-tripleo-rdo + - name: "Find docker image user {{ item }}" + shell: > + docker inspect --format='{{ '{{' }}.ContainerConfig.User{{ '}}' }}' + {{ undercloud_ip }}:8787/tripleo{{ os_version }}/centos-binary-{{ item }}:current-tripleo-rdo + register: user_result + - name: "Patch docker image {{ item }}" + shell: > + cd /home/stack/containers/{{ item }} && docker build + --build-arg REAL_USER={{ user_result.stdout }} + -t {{ undercloud_ip }}:8787/tripleo{{ os_version }}/centos-binary-{{ item }}:apex . diff --git a/lib/ansible/playbooks/prepare_overcloud_containers.yml b/lib/ansible/playbooks/prepare_overcloud_containers.yml index 54dbe098..45ca3011 100644 --- a/lib/ansible/playbooks/prepare_overcloud_containers.yml +++ b/lib/ansible/playbooks/prepare_overcloud_containers.yml @@ -42,6 +42,15 @@ become: yes become_user: stack when: sdn != false + - name: Touch sdn-images file when nosdn + copy: + content: "" + dest: /home/stack/sdn-images.yaml + force: no + group: stack + owner: stack + mode: 0644 + when: sdn == false - name: Update Ceph tag for aarch64 in container env file lineinfile: path: /home/stack/overcloud_containers.yml @@ -68,14 +77,13 @@ url: http://{{ undercloud_ip }}:8787/v2/_catalog body_format: json register: response - - name: Patch Docker images - shell: > - cd /home/stack/containers/{{ item }} && docker build - -t {{ undercloud_ip }}:8787/tripleo{{ os_version }}/centos-binary-{{ item }}:apex . + - include_tasks: patch_containers.yml + with_items: "{{ patched_docker_services }}" + loop_control: + loop_var: item when: - patched_docker_services|length > 0 - item in (response.json)['repositories']|join(" ") - with_items: "{{ patched_docker_services }}" - name: Push patched docker images to local registry shell: docker push {{ undercloud_ip }}:8787/tripleo{{ os_version }}/centos-binary-{{ item }}:apex when: @@ -89,4 +97,4 @@ replace: '\1:apex' with_nested: - [ '/home/stack/sdn-images.yaml', '/home/stack/docker-images.yaml'] - - "{{ patched_docker_services }}" \ No newline at end of file + - "{{ patched_docker_services }}" -- cgit 1.2.3-korg