aboutsummaryrefslogtreecommitdiffstats
path: root/puppet/services
diff options
context:
space:
mode:
Diffstat (limited to 'puppet/services')
-rw-r--r--puppet/services/README.rst4
-rw-r--r--puppet/services/ceph-mon.yaml43
-rw-r--r--puppet/services/ceph-osd.yaml50
-rw-r--r--puppet/services/ceph-rgw.yaml4
-rw-r--r--puppet/services/nova-api.yaml3
-rw-r--r--puppet/services/nova-conductor.yaml5
-rw-r--r--puppet/services/nova-scheduler.yaml3
7 files changed, 82 insertions, 30 deletions
diff --git a/puppet/services/README.rst b/puppet/services/README.rst
index e5c11535..223c3ed0 100644
--- a/puppet/services/README.rst
+++ b/puppet/services/README.rst
@@ -104,7 +104,9 @@ step, "step2" for the second, etc.
2) Stop all control-plane services, ready for upgrade
- 3) Perform a package update, (either specific packages or the whole system)
+ 3) Perform a package update and install new packages: A general
+ upgrade is done, and only new package should go into service
+ ansible tasks.
4) Start services needed for migration tasks (e.g DB)
diff --git a/puppet/services/ceph-mon.yaml b/puppet/services/ceph-mon.yaml
index 1ce58335..d589ef89 100644
--- a/puppet/services/ceph-mon.yaml
+++ b/puppet/services/ceph-mon.yaml
@@ -59,6 +59,14 @@ parameters:
}
default: {}
type: json
+ CephValidationRetries:
+ type: number
+ default: 5
+ description: Number of retry attempts for Ceph validation
+ CephValidationDelay:
+ type: number
+ default: 10
+ description: Interval (in seconds) in between validation checks
MonitoringSubscriptionCephMon:
default: 'overcloud-ceph-mon'
type: string
@@ -119,21 +127,32 @@ outputs:
# rolling upgrade of all osd nodes in step1
- name: Check status
tags: step0,validation
- shell: ceph health | grep -qv HEALTH_ERR
- # FIXME(shardy) I suspect we can use heat or ansible facts here instead?
- - name: Get hostname
+ shell: ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN"
+ - name: Stop CephMon
tags: step0
- shell: hostname -s
- register: mon_id
- - name: Stop Ceph Mon
+ service:
+ name: ceph-mon@{{ ansible_hostname }}
+ state: stopped
+ - name: Update Ceph packages
tags: step0
- service: name=ceph-mon@{{mon_id.stdout}} pattern=ceph-mon state=stopped
- - name: Update ceph packages
+ yum:
+ name: ceph-mon
+ state: latest
+ - name: Start CephMon
tags: step0
- yum: name=ceph-mon state=latest
- - name: Start ceph-mon service
- tags: step0
- service: name=ceph-mon@{{mon_id.stdout}} state=started
+ service:
+ name: ceph-mon@{{ ansible_hostname }}
+ state: started
+ # ceph-ansible
+ # https://github.com/ceph/ceph-ansible/blob/master/infrastructure-playbooks/rolling_update.yml#L149-L157
+ - name: Wait for the monitor to join the quorum...
+ tags: step0,ceph_quorum_validation
+ shell: |
+ ceph -s | grep monmap | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }}
+ register: ceph_quorum_nodecheck
+ until: ceph_quorum_nodecheck.rc == 0
+ retries: {get_param: CephValidationRetries}
+ delay: {get_param: CephValidationDelay}
- name: ceph osd crush tunables default
tags: step0
shell: ceph osd crush tunables default
diff --git a/puppet/services/ceph-osd.yaml b/puppet/services/ceph-osd.yaml
index 9bd83aab..a97fa116 100644
--- a/puppet/services/ceph-osd.yaml
+++ b/puppet/services/ceph-osd.yaml
@@ -21,6 +21,24 @@ parameters:
MonitoringSubscriptionCephOsd:
default: 'overcloud-ceph-osd'
type: string
+ CephValidationRetries:
+ type: number
+ default: 40
+ description: Number of retry attempts for Ceph validation
+ CephValidationDelay:
+ type: number
+ default: 30
+ description: Interval (in seconds) in between validation checks
+ IgnoreCephUpgradeWarnings:
+ type: boolean
+ default: false
+ description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean
+
+parameter_groups:
+- label: deprecated
+ description: Do not use deprecated params, they will be removed.
+ parameters:
+ - IgnoreCephUpgradeWarnings
resources:
CephBase:
@@ -66,17 +84,37 @@ outputs:
- name: ceph osd set noscrub
tags: step1
command: ceph osd set noscrub
- - name: Stop Ceph OSD
+ - name: Stop CephOSD
tags: step1
- service: name=ceph-osd@{{ item }} state=stopped
+ service:
+ name: ceph-osd@{{ item }}
+ state: stopped
with_items: "{{osd_ids.stdout.strip().split()}}"
- - name: Update ceph OSD packages
+ - name: Update Ceph packages
tags: step1
- yum: name=ceph-osd state=latest
- - name: Start ceph-osd service
+ yum:
+ name: ceph-osd
+ state: latest
+ - name: Start CephOSD
tags: step1
- service: name=ceph-osd@{{ item }} state=started
+ service:
+ name: ceph-osd@{{ item }}
+ state: started
with_items: "{{osd_ids.stdout.strip().split()}}"
+ # with awk we are meant to check if $2 and $4 are *the same* but it returns 1 when
+ # they are, so the check is inverted to produce an useful exit code
+ - name: Wait for clean pgs...
+ tags: step1,ceph_pgs_clean_validation
+ vars:
+ ignore_warnings: {get_param: IgnoreCephUpgradeWarnings}
+ shell: |
+ ceph pg stat | awk '{exit($2!=$4)}' && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN"
+ register: ceph_pgs_healthcheck
+ until: ceph_pgs_healthcheck.rc == 0
+ retries: {get_param: CephValidationRetries}
+ delay: {get_param: CephValidationDelay}
+ when:
+ - not ignore_warnings
- name: ceph osd unset noout
tags: step1
command: ceph osd unset noout
diff --git a/puppet/services/ceph-rgw.yaml b/puppet/services/ceph-rgw.yaml
index d7014e54..c5b29c7e 100644
--- a/puppet/services/ceph-rgw.yaml
+++ b/puppet/services/ceph-rgw.yaml
@@ -87,4 +87,6 @@ outputs:
tags: step0,validation
- name: Stop RGW instance
tags: step1
- service: name=ceph-radosgw@{{rgw_id.stdout}} state=stopped
+ service:
+ name: ceph-radosgw@{{rgw_id.stdout}}
+ state: stopped
diff --git a/puppet/services/nova-api.yaml b/puppet/services/nova-api.yaml
index 0adefecd..f27b53f2 100644
--- a/puppet/services/nova-api.yaml
+++ b/puppet/services/nova-api.yaml
@@ -197,9 +197,6 @@ outputs:
- name: Stop and disable nova_api service (pre-upgrade not under httpd)
tags: step2
service: name=openstack-nova-api state=stopped enabled=no
- - name: update nova api
- tags: step2
- yum: name=openstack-nova-api state=latest
- name: Create puppet manifest to set transport_url in nova.conf
tags: step5
when: is_bootstrap_node
diff --git a/puppet/services/nova-conductor.yaml b/puppet/services/nova-conductor.yaml
index 7b086536..a19d0f8d 100644
--- a/puppet/services/nova-conductor.yaml
+++ b/puppet/services/nova-conductor.yaml
@@ -69,13 +69,10 @@ outputs:
- name: Stop nova_conductor service
tags: step2
service: name=openstack-nova-conductor state=stopped
- - name: update nova conductor
- tags: step2
- yum: name=openstack-nova-conductor state=latest
# If not already set by puppet (e.g a pre-ocata version), set the
# upgrade_level for compute to "auto"
- name: Set compute upgrade level to auto
- tags: step3
+ tags: step2
ini_file:
str_replace:
template: "dest=/etc/nova/nova.conf section=upgrade_levels option=compute value=LEVEL"
diff --git a/puppet/services/nova-scheduler.yaml b/puppet/services/nova-scheduler.yaml
index 0e0b9d1e..e08bf182 100644
--- a/puppet/services/nova-scheduler.yaml
+++ b/puppet/services/nova-scheduler.yaml
@@ -67,6 +67,3 @@ outputs:
- name: Stop nova_scheduler service
tags: step2
service: name=openstack-nova-scheduler state=stopped
- - name: update nova scheduler
- tags: step2
- yum: name=openstack-nova-scheduler state=latest