aboutsummaryrefslogtreecommitdiffstats
path: root/mcp/config/states/maas
blob: db0bd3fe68282595de35a6831c6e1ca4b7f9adc6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* 
#!/bin/bash
set -x

function wait_for() {
  local total_attempts=$1; shift
  local cmdstr=$*
  local sleep_time=10
  echo "[NOTE] Waiting for cmd to return success: ${cmdstr}"
  # shellcheck disable=SC2034
  for attempt in $(seq "${total_attempts}"); do
    # shellcheck disable=SC2015
    eval "${cmdstr}" && break || true
    echo -n '.'; sleep "${sleep_time}"
  done
}

# Wait for MaaS commissioning/deploy to finish, retry on failure
function maas_fixup() {
  local statuscmd="salt 'mas01*' --out yaml state.apply maas.machines.status"
  wait_for 180 "${statuscmd} | tee /dev/stderr | " \
           "grep -Eq '((Deployed|Ready): 5|status:Failed|status:Allocated)'"
  # shellcheck disable=SC2155
  local statusout=$(eval "${statuscmd}")

  # shellcheck disable=SC2155
  local fcnodes=$(echo "${statusout}" | \
    grep -Po '(?<=system_id:)(.*)(?=,status:Failed commissioning)')
  for node_system_id in ${fcnodes}; do
    salt -C 'mas01*' state.apply maas.machines.delete \
      pillar="{'system_id': '${node_system_id}'}"
  done
  if [ -n "${fcnodes}" ]; then
    salt -C 'mas01*' state.apply maas.machines
    return 1
  fi

  # shellcheck disable=SC2155
  local fdnodes=$(echo "${statusout}" | \
    grep -Po '(?<=system_id:)(.*)(?=,status:(Failed deployment|Allocated))')
  for node_system_id in ${fdnodes}; do
    salt -C 'mas01*' state.apply maas.machines.mark_broken_fixed \
      pillar="{'system_id': '${node_system_id}'}"
  done
  if [ -n "${fdnodes}" ]; then
    salt -C 'mas01*' state.apply maas.machines.deploy
    return 1
  fi

  return 0
}

# MaaS rack/region controller, node commissioning
salt -C 'mas01*' cmd.run "add-apt-repository ppa:maas/stable"

salt -C 'mas01*' state.apply linux,salt,openssh,ntp
salt -C 'mas01*' state.apply linux.network.interface
salt -C 'mas01*' state.apply maas.pxe_nat
salt -C 'mas01*' state.apply maas.cluster
salt -C 'cfg01*' cmd.run \
  "route add -net 192.168.11.0/24 gw ${MAAS_IP:-192.168.10.3}"

wait_for 10 "salt -C 'mas01*' state.apply maas.region"

salt -C 'mas01*' state.apply maas.machines
wait_for 10 maas_fixup

# cleanup outdated salt keys
salt-key --out yaml | awk '!/^(minions|- cfg01|- mas01)/ {print $2}' | \
  xargs -I{} salt-key -yd {}

# MaaS node deployment
salt -C 'mas01*' state.apply maas.machines.deploy
wait_for 10 maas_fixup

salt -C 'mas01*' pillar.item\
  maas:region:admin:username \
  maas:region:admin:password

# KVM, compute node prereqs (libvirt first), VCP deployment
salt -C '* and not cfg01* and not mas01*' saltutil.sync_all

salt -C 'kvm*' pkg.install bridge-utils
salt -C 'kvm*' state.apply linux.network
salt -C 'kvm*' system.reboot
wait_for 90 "! salt 'kvm*' test.ping | tee /dev/stderr | fgrep -q 'Not connected'"

salt -C '* and not cfg01* and not mas01*' state.apply linux,ntp

salt -C 'kvm*' state.sls libvirt

salt -C '* and not cfg01* and not mas01*' state.apply salt
salt -C 'kvm*' saltutil.sync_all
salt -C 'kvm*' state.sls salt.control

vcp_nodes=$(salt --out yaml 'kvm01*' pillar.get salt:control:cluster:internal:node | \
            awk '/\s+\w+:$/ {gsub(/:$/, "*"); print $1}')

# Check all vcp nodes are available
rc=1
while [ $rc -ne 0 ]; do
  rc=0
  for node in $vcp_nodes; do
    salt "$node" test.ping 2>/dev/null || { rc=$?; break; };
  done
  sleep 5
done

wait_for 10 "salt -C '* and not cfg01* and not mas01*' saltutil.sync_all"
wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' state.apply salt"
wait_for 10 "! salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' state.apply linux,ntp | " \
  "tee /dev/stderr | fgrep -q 'Not connected'"

wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' ssh.set_auth_key ${SUDO_USER} \
  $(awk 'NR==1{print $2}' "$(eval echo "~${SUDO_USER}/.ssh/authorized_keys")")"