blob: a97fa116771882512cbe74fc1af9761b0a6334de (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
heat_template_version: ocata
description: >
Ceph OSD service.
parameters:
ServiceNetMap:
default: {}
description: Mapping of service_name -> network name. Typically set
via parameter_defaults in the resource registry. This
mapping overrides those in ServiceNetMapDefaults.
type: json
DefaultPasswords:
default: {}
type: json
EndpointMap:
default: {}
description: Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type: json
MonitoringSubscriptionCephOsd:
default: 'overcloud-ceph-osd'
type: string
CephValidationRetries:
type: number
default: 40
description: Number of retry attempts for Ceph validation
CephValidationDelay:
type: number
default: 30
description: Interval (in seconds) in between validation checks
IgnoreCephUpgradeWarnings:
type: boolean
default: false
description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean
parameter_groups:
- label: deprecated
description: Do not use deprecated params, they will be removed.
parameters:
- IgnoreCephUpgradeWarnings
resources:
CephBase:
type: ./ceph-base.yaml
properties:
ServiceNetMap: {get_param: ServiceNetMap}
DefaultPasswords: {get_param: DefaultPasswords}
EndpointMap: {get_param: EndpointMap}
outputs:
role_data:
description: Role data for the Cinder OSD service.
value:
service_name: ceph_osd
monitoring_subscription: {get_param: MonitoringSubscriptionCephOsd}
config_settings:
map_merge:
- get_attr: [CephBase, role_data, config_settings]
- tripleo.ceph_osd.firewall_rules:
'111 ceph_osd':
dport:
- '6800-7300'
step_config: |
include ::tripleo::profile::base::ceph::osd
upgrade_batch_tasks:
- name: Check status
tags: step1,validation
shell: ceph health | grep -qv HEALTH_ERR
- name: Get OSD IDs
tags: step1
shell: ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }'
register: osd_ids
# "so that mirrors aren't rebalanced as if the OSD died" - gfidente / leseb
- name: ceph osd set noout
tags: step1
command: ceph osd set noout
- name: ceph osd set norebalance
tags: step1
command: ceph osd set norebalance
- name: ceph osd set nodeep-scrub
tags: step1
command: ceph osd set nodeep-scrub
- name: ceph osd set noscrub
tags: step1
command: ceph osd set noscrub
- name: Stop CephOSD
tags: step1
service:
name: ceph-osd@{{ item }}
state: stopped
with_items: "{{osd_ids.stdout.strip().split()}}"
- name: Update Ceph packages
tags: step1
yum:
name: ceph-osd
state: latest
- name: Start CephOSD
tags: step1
service:
name: ceph-osd@{{ item }}
state: started
with_items: "{{osd_ids.stdout.strip().split()}}"
# with awk we are meant to check if $2 and $4 are *the same* but it returns 1 when
# they are, so the check is inverted to produce an useful exit code
- name: Wait for clean pgs...
tags: step1,ceph_pgs_clean_validation
vars:
ignore_warnings: {get_param: IgnoreCephUpgradeWarnings}
shell: |
ceph pg stat | awk '{exit($2!=$4)}' && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN"
register: ceph_pgs_healthcheck
until: ceph_pgs_healthcheck.rc == 0
retries: {get_param: CephValidationRetries}
delay: {get_param: CephValidationDelay}
when:
- not ignore_warnings
- name: ceph osd unset noout
tags: step1
command: ceph osd unset noout
- name: ceph osd unset norebalance
tags: step1
command: ceph osd unset norebalance
- name: ceph osd unset nodeep-scrub
tags: step1
command: ceph osd unset nodeep-scrub
- name: ceph osd unset noscrub
tags: step1
command: ceph osd unset noscrub
|