aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGiulio Fidente <gfidente@redhat.com>2017-02-17 16:49:58 +0100
committerGiulio Fidente <gfidente@redhat.com>2017-02-21 20:53:01 +0000
commita3df16776dd5d7eb0a60ca4c58cef9913eb1c5cb (patch)
tree6d32a23466f2b76d5f1a21964276653682723a19
parentd5b0c38f0b9ed0e7ca2eb4e1e2f6aaa003268b33 (diff)
Add checks in ansible upgrade tasks for CephMon and CephOSD
Adds two checks, one for the CephMon and one for the CephOSD upgrade tasks borrowed from ceph-ansible. Change-Id: I0a0e60d277240130c6bd76a74ccc13354b87a30a Co-Authored-By: Sebastien Han <seb@redhat.com>
-rw-r--r--extraconfig/tasks/major_upgrade_pacemaker.yaml4
-rw-r--r--puppet/services/ceph-mon.yaml43
-rw-r--r--puppet/services/ceph-osd.yaml50
-rw-r--r--puppet/services/ceph-rgw.yaml4
4 files changed, 78 insertions, 23 deletions
diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml
index 8c91027d..74d3be71 100644
--- a/extraconfig/tasks/major_upgrade_pacemaker.yaml
+++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml
@@ -18,10 +18,6 @@ parameters:
constraints:
- allowed_values: ['auto', 'yes', 'no']
default: 'auto'
- IgnoreCephUpgradeWarnings:
- type: boolean
- default: false
- description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean
KeepSaharaServicesOnUpgrade:
type: boolean
default: true
diff --git a/puppet/services/ceph-mon.yaml b/puppet/services/ceph-mon.yaml
index 1ce58335..d589ef89 100644
--- a/puppet/services/ceph-mon.yaml
+++ b/puppet/services/ceph-mon.yaml
@@ -59,6 +59,14 @@ parameters:
}
default: {}
type: json
+ CephValidationRetries:
+ type: number
+ default: 5
+ description: Number of retry attempts for Ceph validation
+ CephValidationDelay:
+ type: number
+ default: 10
+ description: Interval (in seconds) in between validation checks
MonitoringSubscriptionCephMon:
default: 'overcloud-ceph-mon'
type: string
@@ -119,21 +127,32 @@ outputs:
# rolling upgrade of all osd nodes in step1
- name: Check status
tags: step0,validation
- shell: ceph health | grep -qv HEALTH_ERR
- # FIXME(shardy) I suspect we can use heat or ansible facts here instead?
- - name: Get hostname
+ shell: ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN"
+ - name: Stop CephMon
tags: step0
- shell: hostname -s
- register: mon_id
- - name: Stop Ceph Mon
+ service:
+ name: ceph-mon@{{ ansible_hostname }}
+ state: stopped
+ - name: Update Ceph packages
tags: step0
- service: name=ceph-mon@{{mon_id.stdout}} pattern=ceph-mon state=stopped
- - name: Update ceph packages
+ yum:
+ name: ceph-mon
+ state: latest
+ - name: Start CephMon
tags: step0
- yum: name=ceph-mon state=latest
- - name: Start ceph-mon service
- tags: step0
- service: name=ceph-mon@{{mon_id.stdout}} state=started
+ service:
+ name: ceph-mon@{{ ansible_hostname }}
+ state: started
+ # ceph-ansible
+ # https://github.com/ceph/ceph-ansible/blob/master/infrastructure-playbooks/rolling_update.yml#L149-L157
+ - name: Wait for the monitor to join the quorum...
+ tags: step0,ceph_quorum_validation
+ shell: |
+ ceph -s | grep monmap | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }}
+ register: ceph_quorum_nodecheck
+ until: ceph_quorum_nodecheck.rc == 0
+ retries: {get_param: CephValidationRetries}
+ delay: {get_param: CephValidationDelay}
- name: ceph osd crush tunables default
tags: step0
shell: ceph osd crush tunables default
diff --git a/puppet/services/ceph-osd.yaml b/puppet/services/ceph-osd.yaml
index 9bd83aab..a97fa116 100644
--- a/puppet/services/ceph-osd.yaml
+++ b/puppet/services/ceph-osd.yaml
@@ -21,6 +21,24 @@ parameters:
MonitoringSubscriptionCephOsd:
default: 'overcloud-ceph-osd'
type: string
+ CephValidationRetries:
+ type: number
+ default: 40
+ description: Number of retry attempts for Ceph validation
+ CephValidationDelay:
+ type: number
+ default: 30
+ description: Interval (in seconds) in between validation checks
+ IgnoreCephUpgradeWarnings:
+ type: boolean
+ default: false
+ description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean
+
+parameter_groups:
+- label: deprecated
+ description: Do not use deprecated params, they will be removed.
+ parameters:
+ - IgnoreCephUpgradeWarnings
resources:
CephBase:
@@ -66,17 +84,37 @@ outputs:
- name: ceph osd set noscrub
tags: step1
command: ceph osd set noscrub
- - name: Stop Ceph OSD
+ - name: Stop CephOSD
tags: step1
- service: name=ceph-osd@{{ item }} state=stopped
+ service:
+ name: ceph-osd@{{ item }}
+ state: stopped
with_items: "{{osd_ids.stdout.strip().split()}}"
- - name: Update ceph OSD packages
+ - name: Update Ceph packages
tags: step1
- yum: name=ceph-osd state=latest
- - name: Start ceph-osd service
+ yum:
+ name: ceph-osd
+ state: latest
+ - name: Start CephOSD
tags: step1
- service: name=ceph-osd@{{ item }} state=started
+ service:
+ name: ceph-osd@{{ item }}
+ state: started
with_items: "{{osd_ids.stdout.strip().split()}}"
+ # with awk we are meant to check if $2 and $4 are *the same* but it returns 1 when
+ # they are, so the check is inverted to produce an useful exit code
+ - name: Wait for clean pgs...
+ tags: step1,ceph_pgs_clean_validation
+ vars:
+ ignore_warnings: {get_param: IgnoreCephUpgradeWarnings}
+ shell: |
+ ceph pg stat | awk '{exit($2!=$4)}' && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN"
+ register: ceph_pgs_healthcheck
+ until: ceph_pgs_healthcheck.rc == 0
+ retries: {get_param: CephValidationRetries}
+ delay: {get_param: CephValidationDelay}
+ when:
+ - not ignore_warnings
- name: ceph osd unset noout
tags: step1
command: ceph osd unset noout
diff --git a/puppet/services/ceph-rgw.yaml b/puppet/services/ceph-rgw.yaml
index d7014e54..c5b29c7e 100644
--- a/puppet/services/ceph-rgw.yaml
+++ b/puppet/services/ceph-rgw.yaml
@@ -87,4 +87,6 @@ outputs:
tags: step0,validation
- name: Stop RGW instance
tags: step1
- service: name=ceph-radosgw@{{rgw_id.stdout}} state=stopped
+ service:
+ name: ceph-radosgw@{{rgw_id.stdout}}
+ state: stopped