1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#!/bin/bash
set -x
function wait_for() {
local total_attempts=$1; shift
local cmdstr=$*
local sleep_time=10
echo "[NOTE] Waiting for cmd to return success: ${cmdstr}"
# shellcheck disable=SC2034
for attempt in $(seq "${total_attempts}"); do
# shellcheck disable=SC2015
eval "${cmdstr}" && break || true
echo -n '.'; sleep "${sleep_time}"
done
}
# Wait for MaaS commissioning/deploy to finish, retry on failure
function maas_fixup() {
local statuscmd="salt 'mas01*' --out yaml state.apply maas.machines.status"
wait_for 180 "${statuscmd} | tee /dev/stderr | " \
"grep -Eq '((Deployed|Ready): 5|status:Failed|status:Allocated)'"
# shellcheck disable=SC2155
local statusout=$(eval "${statuscmd}")
# shellcheck disable=SC2155
local fcnodes=$(echo "${statusout}" | \
grep -Po '(?<=system_id:)(.*)(?=,status:Failed commissioning)')
for node_system_id in ${fcnodes}; do
salt -C 'mas01*' state.apply maas.machines.delete \
pillar="{'system_id': '${node_system_id}'}"
sleep 30
done
if [ -n "${fcnodes}" ]; then
salt -C 'mas01*' state.apply maas.machines
return 1
fi
# shellcheck disable=SC2155
local fdnodes=$(echo "${statusout}" | \
grep -Po '(?<=system_id:)(.*)(?=,status:(Failed deployment|Allocated))')
for node_system_id in ${fdnodes}; do
salt -C 'mas01*' state.apply maas.machines.mark_broken_fixed \
pillar="{'system_id': '${node_system_id}'}"
sleep 30
done
if [ -n "${fdnodes}" ]; then
salt -C 'mas01*' state.apply maas.machines.deploy
return 1
fi
return 0
}
# MaaS rack/region controller, node commissioning
salt -C 'mas01*' cmd.run "add-apt-repository ppa:maas/stable"
salt -C 'mas01*' state.apply linux,salt,openssh,ntp
salt -C 'mas01*' state.apply linux.network.interface
salt -C 'mas01*' state.apply maas.pxe_nat
salt -C 'mas01*' state.apply maas.cluster
salt -C 'cfg01*' state.apply maas.pxe_route
wait_for 10 "salt -C 'mas01*' state.apply maas.region"
salt -C 'mas01*' state.apply maas.machines
wait_for 10 maas_fixup
# cleanup outdated salt keys
salt-key --out yaml | awk '!/^(minions|- cfg01|- mas01)/ {print $2}' | \
xargs -I{} salt-key -yd {}
# MaaS node deployment
salt -C 'mas01*' state.apply maas.machines.deploy
wait_for 10 maas_fixup
salt -C 'mas01*' pillar.item\
maas:region:admin:username \
maas:region:admin:password
# KVM, compute node prereqs (libvirt first), VCP deployment
salt -C '* and not cfg01* and not mas01*' saltutil.sync_all
salt -C 'kvm*' pkg.install bridge-utils
salt -C 'kvm*' state.apply linux.network
salt -C 'kvm*' system.reboot
wait_for 90 "! salt 'kvm*' test.ping | tee /dev/stderr | fgrep -q 'Not connected'"
salt -C '* and not cfg01* and not mas01*' state.apply linux,ntp
salt -C 'kvm*' state.sls libvirt
salt -C '* and not cfg01* and not mas01*' state.apply salt
salt -C 'kvm*' saltutil.sync_all
wait_for 10 "! salt -C 'kvm*' state.sls salt.control | " \
"tee /dev/stderr | fgrep -q 'Not connected'"
vcp_nodes=$(salt --out yaml 'kvm01*' pillar.get salt:control:cluster:internal:node | \
awk '/\s+\w+:$/ {gsub(/:$/, "*"); print $1}')
# Check all vcp nodes are available
rc=1
while [ $rc -ne 0 ]; do
rc=0
for node in $vcp_nodes; do
salt "$node" test.ping 2>/dev/null || { rc=$?; break; };
done
sleep 5
done
wait_for 10 "salt -C '* and not cfg01* and not mas01*' saltutil.sync_all"
wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' state.apply salt"
wait_for 10 "! salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' state.apply linux,ntp | " \
"tee /dev/stderr | fgrep -q 'Not connected'"
wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' ssh.set_auth_key ${SUDO_USER} \
$(awk 'NR==1{print $2}' "$(eval echo "~${SUDO_USER}/.ssh/authorized_keys")")"
# Get the latest packages for openstack nodes
wait_for 10 "! salt -C 'E@^(?!cfg01|mas01|kvm).*' pkg.upgrade refresh=False | " \
"tee /dev/stderr | fgrep -q 'Not connected'"
|