heat_template_version: pike

description: >
  Ceph OSD service.

parameters:
  ServiceData:
    default: {}
    description: Dictionary packing service data
    type: json
  ServiceNetMap:
    default: {}
    description: Mapping of service_name -> network name. Typically set
                 via parameter_defaults in the resource registry.  This
                 mapping overrides those in ServiceNetMapDefaults.
    type: json
  DefaultPasswords:
    default: {}
    type: json
  RoleName:
    default: ''
    description: Role name on which the service is applied
    type: string
  RoleParameters:
    default: {}
    description: Parameters specific to the role
    type: json
  EndpointMap:
    default: {}
    description: Mapping of service endpoint -> protocol. Typically set
                 via parameter_defaults in the resource registry.
    type: json
  MonitoringSubscriptionCephOsd:
    default: 'overcloud-ceph-osd'
    type: string
  CephValidationRetries:
    type: number
    default: 40
    description: Number of retry attempts for Ceph validation
  CephValidationDelay:
    type: number
    default: 30
    description: Interval (in seconds) in between validation checks
  IgnoreCephUpgradeWarnings:
    type: boolean
    default: false
    description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean

parameter_groups:
- label: deprecated
  description: Do not use deprecated params, they will be removed.
  parameters:
  - IgnoreCephUpgradeWarnings

resources:
  CephBase:
    type: ./ceph-base.yaml
    properties:
      ServiceData: {get_param: ServiceData}
      ServiceNetMap: {get_param: ServiceNetMap}
      DefaultPasswords: {get_param: DefaultPasswords}
      EndpointMap: {get_param: EndpointMap}
      RoleName: {get_param: RoleName}
      RoleParameters: {get_param: RoleParameters}

outputs:
  role_data:
    description: Role data for the Cinder OSD service.
    value:
      service_name: ceph_osd
      monitoring_subscription: {get_param: MonitoringSubscriptionCephOsd}
      config_settings:
        map_merge:
          - get_attr: [CephBase, role_data, config_settings]
          - tripleo.ceph_osd.firewall_rules:
              '111 ceph_osd':
                dport:
                  - '6800-7300'
      step_config: |
        include ::tripleo::profile::base::ceph::osd
      upgrade_batch_tasks:
        - name: Check status
          tags: step1,validation
          shell: ceph health | grep -qv HEALTH_ERR
        - name: Get OSD IDs
          tags: step1
          shell: ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }'
          register: osd_ids
          # "so that mirrors aren't rebalanced as if the OSD died" - gfidente / leseb
        - name: ceph osd set noout
          tags: step1
          command: ceph osd set noout
        - name: ceph osd set norebalance
          tags: step1
          command: ceph osd set norebalance
        - name: ceph osd set nodeep-scrub
          tags: step1
          command: ceph osd set nodeep-scrub
        - name: ceph osd set noscrub
          tags: step1
          command: ceph osd set noscrub
        - name: Stop CephOSD
          tags: step1
          service:
            name: ceph-osd@{{ item }}
            state: stopped
          with_items: "{{osd_ids.stdout.strip().split()}}"
        - name: Update Ceph packages
          tags: step1
          yum:
            name: ceph-osd
            state: latest
        - name: Start CephOSD
          tags: step1
          service:
            name: ceph-osd@{{ item }}
            state: started
          with_items: "{{osd_ids.stdout.strip().split()}}"
        # with awk we are meant to check if $2 and $4 are *the same* but it returns 1 when
        # they are, so the check is inverted to produce an useful exit code
        - name: Wait for clean pgs...
          tags: step1,ceph_pgs_clean_validation
          vars:
            ignore_warnings: {get_param: IgnoreCephUpgradeWarnings}
          shell: |
            ceph pg stat | awk '{exit($2!=$4)}' && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN"
          register: ceph_pgs_healthcheck
          until: ceph_pgs_healthcheck.rc == 0
          retries: {get_param: CephValidationRetries}
          delay: {get_param: CephValidationDelay}
          when:
            - not ignore_warnings
        - name: ceph osd unset noout
          tags: step1
          command: ceph osd unset noout
        - name: ceph osd unset norebalance
          tags: step1
          command: ceph osd unset norebalance
        - name: ceph osd unset nodeep-scrub
          tags: step1
          command: ceph osd unset nodeep-scrub
        - name: ceph osd unset noscrub
          tags: step1
          command: ceph osd unset noscrub