heat_template_version: pike description: > Ceph OSD service. parameters: ServiceData: default: {} description: Dictionary packing service data type: json ServiceNetMap: default: {} description: Mapping of service_name -> network name. Typically set via parameter_defaults in the resource registry. This mapping overrides those in ServiceNetMapDefaults. type: json DefaultPasswords: default: {} type: json RoleName: default: '' description: Role name on which the service is applied type: string RoleParameters: default: {} description: Parameters specific to the role type: json EndpointMap: default: {} description: Mapping of service endpoint -> protocol. Typically set via parameter_defaults in the resource registry. type: json MonitoringSubscriptionCephOsd: default: 'overcloud-ceph-osd' type: string CephValidationRetries: type: number default: 40 description: Number of retry attempts for Ceph validation CephValidationDelay: type: number default: 30 description: Interval (in seconds) in between validation checks IgnoreCephUpgradeWarnings: type: boolean default: false description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean parameter_groups: - label: deprecated description: Do not use deprecated params, they will be removed. parameters: - IgnoreCephUpgradeWarnings resources: CephBase: type: ./ceph-base.yaml properties: ServiceData: {get_param: ServiceData} ServiceNetMap: {get_param: ServiceNetMap} DefaultPasswords: {get_param: DefaultPasswords} EndpointMap: {get_param: EndpointMap} RoleName: {get_param: RoleName} RoleParameters: {get_param: RoleParameters} outputs: role_data: description: Role data for the Cinder OSD service. value: service_name: ceph_osd monitoring_subscription: {get_param: MonitoringSubscriptionCephOsd} config_settings: map_merge: - get_attr: [CephBase, role_data, config_settings] - tripleo.ceph_osd.firewall_rules: '111 ceph_osd': dport: - '6800-7300' step_config: | include ::tripleo::profile::base::ceph::osd upgrade_batch_tasks: - name: Check status tags: step1,validation shell: ceph health | grep -qv HEALTH_ERR - name: Get OSD IDs tags: step1 shell: ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }' register: osd_ids # "so that mirrors aren't rebalanced as if the OSD died" - gfidente / leseb - name: ceph osd set noout tags: step1 command: ceph osd set noout - name: ceph osd set norebalance tags: step1 command: ceph osd set norebalance - name: ceph osd set nodeep-scrub tags: step1 command: ceph osd set nodeep-scrub - name: ceph osd set noscrub tags: step1 command: ceph osd set noscrub - name: Stop CephOSD tags: step1 service: name: ceph-osd@{{ item }} state: stopped with_items: "{{osd_ids.stdout.strip().split()}}" - name: Update Ceph packages tags: step1 yum: name: ceph-osd state: latest - name: Start CephOSD tags: step1 service: name: ceph-osd@{{ item }} state: started with_items: "{{osd_ids.stdout.strip().split()}}" # with awk we are meant to check if $2 and $4 are *the same* but it returns 1 when # they are, so the check is inverted to produce an useful exit code - name: Wait for clean pgs... tags: step1,ceph_pgs_clean_validation vars: ignore_warnings: {get_param: IgnoreCephUpgradeWarnings} shell: | ceph pg stat | awk '{exit($2!=$4)}' && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN" register: ceph_pgs_healthcheck until: ceph_pgs_healthcheck.rc == 0 retries: {get_param: CephValidationRetries} delay: {get_param: CephValidationDelay} when: - not ignore_warnings - name: ceph osd unset noout tags: step1 command: ceph osd unset noout - name: ceph osd unset norebalance tags: step1 command: ceph osd unset norebalance - name: ceph osd unset nodeep-scrub tags: step1 command: ceph osd unset nodeep-scrub - name: ceph osd unset noscrub tags: step1 command: ceph osd unset noscrub