diff options
-rw-r--r-- | tools/kubernetes/ceph-helm.sh | 93 | ||||
-rw-r--r-- | tools/kubernetes/demo_deploy.sh | 58 | ||||
-rw-r--r-- | tools/kubernetes/k8s-cluster.sh | 226 | ||||
-rw-r--r-- | tools/maas/deploy.sh | 21 |
4 files changed, 288 insertions, 110 deletions
diff --git a/tools/kubernetes/ceph-helm.sh b/tools/kubernetes/ceph-helm.sh index a1be588..084a4f7 100644 --- a/tools/kubernetes/ceph-helm.sh +++ b/tools/kubernetes/ceph-helm.sh @@ -37,21 +37,75 @@ function log() { echo "$f:$l ($(date)) $1" } +function make_ceph_setup() { + tee ~/ceph_setup.sh <<'EOG' +#!/bin/bash +# Basic server pre-reqs +dist=$(grep --m 1 ID /etc/os-release | awk -F '=' '{print $2}' | sed 's/"//g') +if [[ "$dist" == "ubuntu" ]]; then + sudo apt-get install -y ceph ceph-common +else + # per http://docs.ceph.com/docs/master/install/get-packages/ + sudo tee /etc/yum.repos.d/ceph.repo <<'EOF' +[ceph] +name=Ceph packages for $basearch +baseurl=https://download.ceph.com/rpm-luminous/el7/x86_64 +enabled=1 +priority=2 +gpgcheck=1 +gpgkey=https://download.ceph.com/keys/release.asc + +[ceph-noarch] +name=Ceph noarch packages +baseurl=https://download.ceph.com/rpm-luminous/el7/noarch +enabled=1 +priority=2 +gpgcheck=1 +gpgkey=https://download.ceph.com/keys/release.asc + +[ceph-source] +name=Ceph source packages +baseurl=https://download.ceph.com/rpm-luminous/el7/SRPMS +enabled=0 +priority=2 +gpgcheck=1 +gpgkey=https://download.ceph.com/keys/release.asc +EOF + # TODO: find out why package us unsigned and thus need --nogpgcheck + sudo rpm --import 'https://download.ceph.com/keys/release.asc' + sudo yum install --nogpgcheck -y ceph ceph-common +fi +EOG +} + function setup_ceph() { nodes=$1 private_net=$2 public_net=$3 dev=$4 - - log "Install ceph prerequisites" - sudo apt-get -y install ceph ceph-common + log "Install ceph and ceph-common" + make_ceph_setup + bash ~/ceph_setup.sh + # per https://github.com/att/netarbiter/tree/master/sds/ceph-docker/examples/helm log "Clone netarbiter" git clone https://github.com/att/netarbiter.git - cd netarbiter/sds/ceph-docker/examples/helm + + if [[ "$dist" != "ubuntu" ]]; then + log "Update ceph-helm chart to point to centos images" + sed -i -- 's~daemon: docker.io/knowpd~#daemon: docker.io/knowpd~' \ + netarbiter/sds/ceph-docker/examples/helm/ceph/values.yaml + sed -i -- 's~#daemon: docker.io/ceph~daemon: docker.io/ceph~' \ + netarbiter/sds/ceph-docker/examples/helm/ceph/values.yaml + sed -i -- 's~ceph_init: docker.io/knowpd~#ceph_init: docker.io/knowpd~' \ + netarbiter/sds/ceph-docker/examples/helm/ceph/values.yaml + sed -i -- 's~#ceph_init: docker.io/kollakube~ceph_init: docker.io/kollakube~' \ + netarbiter/sds/ceph-docker/examples/helm/ceph/values.yaml + fi log "Prepare a ceph namespace in your K8s cluster" + cd netarbiter/sds/ceph-docker/examples/helm ./prep-ceph-ns.sh log "Run ceph-mon, ceph-mgr, ceph-mon-check, and rbd-provisioner" @@ -93,24 +147,45 @@ EOF for node in $nodes; do log "install ceph, setup resolv.conf, zap disk for $node" - ssh -x -o StrictHostKeyChecking=no ubuntu@$node <<EOG + if [[ "$dist" == "ubuntu" ]]; then + ssh -x -o StrictHostKeyChecking=no $USER@$node \ + sudo apt-get install -y ceph ceph-common + else + scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + ~/ceph_setup.sh $USER@$node:/home/$USER/. + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + $USER@$node bash /home/$USER/ceph_setup.sh + fi + + ssh -x -o StrictHostKeyChecking=no $USER@$node <<EOG cat <<EOF | sudo tee /etc/resolv.conf nameserver $kubedns search ceph.svc.cluster.local svc.cluster.local cluster.local options ndots:5 EOF -sudo apt install -y ceph ceph-common sudo ceph-disk zap /dev/$dev EOG log "Run ceph-osd at $node" - name=$(ssh -x -o StrictHostKeyChecking=no ubuntu@$node hostname) + name=$(ssh -x -o StrictHostKeyChecking=no $USER@$node hostname) + # TODO: try sudo due to error + # command_check_call: Running command: /usr/bin/ceph-osd --cluster ceph --mkfs -i 0 --monmap /var/lib/ceph/tmp/mnt.JKiQbp/activate.monmap --osd-data /var/lib/ceph/tmp/mnt.JKiQbp --osd-uuid 23e72c93-e5b3-48ad-b919-ef59fe92b189 --setuser ceph --setgroup disk ... -1 bluestore(/var/lib/ceph/tmp/mnt.JKiQbp) _setup_block_symlink_or_file failed to open block file: (13) Permission denied + # TODO: leave out sudo... resulted in "./helm-install-ceph-osd.sh: line 40: helm: command not found" + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + $USER@$node sudo chmod 777 /var/lib/ceph/tmp ./helm-install-ceph-osd.sh $name /dev/$dev done for node in $nodes; do - name=$(ssh -x -o StrictHostKeyChecking=no ubuntu@$node hostname) + name=$(ssh -x -o StrictHostKeyChecking=no $USER@$node hostname) pod=$(kubectl get pods --namespace ceph | awk "/$name/{print \$1}") - log "verify ceph-osd is Running at node $name" + while "$pod" == "" ; do + log "ceph-osd pod not yet created at node $name, waiting 10 seconds" + kubectl get pods --namespace ceph + sleep 10 + pod=$(kubectl get pods --namespace ceph | awk "/$name/{print \$1}") + done + + log "wait till ceph-osd pod $pod is Running at node $name" status=$(kubectl get pods --namespace ceph $pod | awk "/$pod/ {print \$3}") while [[ "x$status" != "xRunning" ]]; do log "$pod status is $status. Waiting 10 seconds for it to be Running." diff --git a/tools/kubernetes/demo_deploy.sh b/tools/kubernetes/demo_deploy.sh index dba500b..61adaa2 100644 --- a/tools/kubernetes/demo_deploy.sh +++ b/tools/kubernetes/demo_deploy.sh @@ -22,16 +22,16 @@ #. - OPNFV VES as an ONAP-compatible monitoring platform #. #. Prerequisites: -#. - Ubuntu server for kubernetes cluster nodes (master and worker nodes) -#. - MAAS server as cluster admin for kubernetes master/worker nodes +#. - MAAS server as cluster admin for k8s master/worker nodes. #. - Password-less ssh key provided for node setup #. - hostname of kubernetes master setup in DNS or /etc/hosts #. Usage: on the MAAS server #. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models -#. $ bash ~/models/tools/kubernetes/demo_deploy.sh <key> "<hosts>" <master> -#. "<workers>" <pub-net> <priv-net> <ceph-mode> <ceph-dev> [<extras>] -#. <key>: name of private key for cluster node ssh (in current folder) +#. $ bash ~/models/tools/kubernetes/demo_deploy.sh "<hosts>" <os> <key> +#. <master> "<workers>" <pub-net> <priv-net> <ceph-mode> <ceph-dev> [<extras>] #. <hosts>: space separated list of hostnames managed by MAAS +#. <os>: OS to deploy, one of "ubuntu" (Xenial) or "centos" (Centos 7) +#. <key>: name of private key for cluster node ssh (in current folder) #. <master>: IP of cluster master node #. <workers>: space separated list of worker node IPs #. <pub-net>: CID formatted public network @@ -58,7 +58,7 @@ function step_end() { function run_master() { start=$((`date +%s`/60)) ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ - ubuntu@$k8s_master <<EOF + $k8s_user@$k8s_master <<EOF exec ssh-agent bash ssh-add $k8s_key $1 @@ -66,19 +66,22 @@ EOF step_end "$1" } -extras=$9 +extras=${10} +# Note MAAS deploys OS's with default user same as OS name cat <<EOF >~/k8s_env.sh -k8s_key=$1 -k8s_nodes="$2" -k8s_master=$3 -k8s_workers="$4" -k8s_priv_net=$5 -k8s_pub_net=$6 -k8s_ceph_mode=$7 -k8s_ceph_dev=$8 -export k8s_key +k8s_nodes="$1" +k8s_user=$2 +k8s_key=$3 +k8s_master=$4 +k8s_workers="$5" +k8s_priv_net=$6 +k8s_pub_net=$7 +k8s_ceph_mode=$8 +k8s_ceph_dev=$9 export k8s_nodes +export k8s_user +export k8s_key export k8s_master export k8s_workers export k8s_priv_net @@ -89,17 +92,17 @@ EOF source ~/k8s_env.sh env | grep k8s_ -source ~/models/tools/maas/deploy.sh $k8s_key "$k8s_nodes" $extras +source ~/models/tools/maas/deploy.sh $k8s_user $k8s_key "$k8s_nodes" $extras eval `ssh-agent` ssh-add $k8s_key scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $k8s_key \ - ubuntu@$k8s_master:/home/ubuntu/$k8s_key + $k8s_user@$k8s_master:/home/$k8s_user/$k8s_key scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ~/k8s_env.sh \ - ubuntu@$k8s_master:/home/ubuntu/. + $k8s_user@$k8s_master:/home/$k8s_user/. echo; echo "$0 $(date): Setting up kubernetes master..." scp -r -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ - ~/models/tools/kubernetes/* ubuntu@$k8s_master:/home/ubuntu/. + ~/models/tools/kubernetes/* $k8s_user@$k8s_master:/home/$k8s_user/. run_master "bash k8s-cluster.sh master" echo; echo "$0 $(date): Setting up kubernetes workers..." @@ -120,12 +123,12 @@ run_master "bash k8s-cluster.sh demo start dokuwiki" echo; echo "Setting up Prometheus..." scp -r -o StrictHostKeyChecking=no ~/models/tools/prometheus/* \ - ubuntu@$k8s_master:/home/ubuntu/. + $k8s_user@$k8s_master:/home/$k8s_user/. run_master "bash prometheus-tools.sh all \"$k8s_workers\"" echo; echo "$0 $(date): Setting up cloudify..." scp -r -o StrictHostKeyChecking=no ~/models/tools/cloudify \ - ubuntu@$k8s_master:/home/ubuntu/. + $k8s_user@$k8s_master:/home/$k8s_user/. run_master "bash cloudify/k8s-cloudify.sh prereqs" run_master "bash cloudify/k8s-cloudify.sh setup" @@ -135,6 +138,7 @@ run "bash $HOME/models/tools/cloudify/k8s-cloudify.sh demo start" echo; echo "$0 $(date): Setting up VES" # not re-cloned if existing - allows patch testing locally if [[ ! -d ~/ves ]]; then + echo; echo "$0 $(date): Cloning VES" git clone https://gerrit.opnfv.org/gerrit/ves ~/ves fi ves_influxdb_host=$k8s_master:8086 @@ -143,18 +147,18 @@ ves_grafana_host=$k8s_master:3000 export ves_grafana_host ves_grafana_auth=admin:admin export ves_grafana_auth -ves_kafka_hostname=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$k8s_master hostname) +ves_kafka_hostname=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $k8s_user@$k8s_master hostname) export ves_kafka_hostname ves_loglevel=$ves_loglevel export ves_loglevel # Can't pass quoted strings in commands start=$((`date +%s`/60)) -bash $HOME/ves/tools/demo_deploy.sh $k8s_key $k8s_master "$k8s_workers" -step_end "bash $HOME/ves/tools/demo_deploy.sh $k8s_key $k8s_master \"$k8s_workers\"" +bash $HOME/ves/tools/demo_deploy.sh $k8s_key $k8s_user $k8s_master "$k8s_workers" +step_end "bash $HOME/ves/tools/demo_deploy.sh $k8s_key $k8s_user $k8s_master \"$k8s_workers\"" echo; echo "$0 $(date): All done!" -export NODE_PORT=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$k8s_master kubectl get --namespace default -o jsonpath="{.spec.ports[0].nodePort}" services dw-dokuwiki) -export NODE_IP=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$k8s_master kubectl get nodes --namespace default -o jsonpath="{.items[0].status.addresses[0].address}") +export NODE_PORT=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $k8s_user@$k8s_master kubectl get --namespace default -o jsonpath="{.spec.ports[0].nodePort}" services dw-dokuwiki) +export NODE_IP=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $k8s_user@$k8s_master kubectl get nodes --namespace default -o jsonpath="{.items[0].status.addresses[0].address}") echo "Helm chart demo app dokuwiki is available at http://$NODE_IP:$NODE_PORT/" # TODO update Cloudify demo app to have public exposed service address port=$( bash ~/models/tools/cloudify/k8s-cloudify.sh port nginx $k8s_master) diff --git a/tools/kubernetes/k8s-cluster.sh b/tools/kubernetes/k8s-cluster.sh index 1700a6a..cf1e840 100644 --- a/tools/kubernetes/k8s-cluster.sh +++ b/tools/kubernetes/k8s-cluster.sh @@ -15,7 +15,7 @@ # #. What this is: script to setup a kubernetes cluster with calico as sni #. Prerequisites: -#. - Ubuntu xenial server for master and worker nodes +#. - Ubuntu Xenial or Centos 7 server for master and worker nodes #. - key-based auth setup for ssh/scp between master and worker nodes #. - 192.168.0.0/16 should not be used on your server network interface subnets #. Usage: @@ -54,64 +54,103 @@ function log() { f=$(caller 0 | awk '{print $2}') l=$(caller 0 | awk '{print $1}') echo; echo "$f:$l ($(date)) $1" + kubectl get pods --all-namespaces } function setup_prereqs() { log "Create prerequisite setup script" - cat <<'EOG' >/tmp/prereqs.sh + cat <<'EOG' >~/prereqs.sh #!/bin/bash # Basic server pre-reqs -echo; echo "prereqs.sh: ($(date)) Basic prerequisites" -sudo apt-get update -sudo apt-get upgrade -y +dist=$(grep --m 1 ID /etc/os-release | awk -F '=' '{print $2}' | sed 's/"//g') if [[ $(grep -c $HOSTNAME /etc/hosts) -eq 0 ]]; then echo; echo "prereqs.sh: ($(date)) Add $HOSTNAME to /etc/hosts" - echo "$(ip route get 8.8.8.8 | awk '{print $NF; exit}') $HOSTNAME" \ + # have to add "/sbin" to path of IP command for centos + echo "$(/sbin/ip route get 8.8.8.8 | awk '{print $NF; exit}') $HOSTNAME" \ | sudo tee -a /etc/hosts fi -echo; echo "prereqs.sh: ($(date)) Install latest docker" -sudo apt-get install -y docker.io -# Alternate for 1.12.6 -#sudo apt-get install -y libltdl7 -#wget https://packages.docker.com/1.12/apt/repo/pool/main/d/docker-engine/docker-engine_1.12.6~cs8-0~ubuntu-xenial_amd64.deb -#sudo dpkg -i docker-engine_1.12.6~cs8-0~ubuntu-xenial_amd64.deb -sudo service docker restart -echo; echo "prereqs.sh: ($(date)) Get k8s packages" -export KUBE_VERSION=1.7.5 -# per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/ -# Install kubelet, kubeadm, kubectl per https://kubernetes.io/docs/setup/independent/install-kubeadm/ -sudo apt-get update && sudo apt-get install -y apt-transport-https -curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - -cat <<EOF | sudo tee /etc/apt/sources.list.d/kubernetes.list +if [[ "$dist" == "ubuntu" ]]; then + # Per https://kubernetes.io/docs/setup/independent/install-kubeadm/ + echo; echo "prereqs.sh: ($(date)) Basic prerequisites" + sudo apt-get update + sudo apt-get upgrade -y + echo; echo "prereqs.sh: ($(date)) Install latest docker" + sudo apt-get install -y docker.io + # Alternate for 1.12.6 + #sudo apt-get install -y libltdl7 + #wget https://packages.docker.com/1.12/apt/repo/pool/main/d/docker-engine/docker-engine_1.12.6~cs8-0~ubuntu-xenial_amd64.deb + #sudo dpkg -i docker-engine_1.12.6~cs8-0~ubuntu-xenial_amd64.deb + sudo service docker restart + echo; echo "prereqs.sh: ($(date)) Get k8s packages" + export KUBE_VERSION=1.7.5 + # per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/ + # Install kubelet, kubeadm, kubectl per https://kubernetes.io/docs/setup/independent/install-kubeadm/ + sudo apt-get update && sudo apt-get install -y apt-transport-https + curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - + cat <<EOF | sudo tee /etc/apt/sources.list.d/kubernetes.list deb http://apt.kubernetes.io/ kubernetes-xenial main EOF -sudo apt-get update -echo; echo "prereqs.sh: ($(date)) Install kubectl, kubelet, kubeadm" -sudo apt-get -y install --allow-downgrades kubectl=${KUBE_VERSION}-00 \ - kubelet=${KUBE_VERSION}-00 kubeadm=${KUBE_VERSION}-00 -echo; echo "prereqs.sh: ($(date)) Install jq for API output parsing" -sudo apt-get -y install jq -echo; echo "prereqs.sh: ($(date)) Set firewall rules" -# Per https://kubernetes.io/docs/setup/independent/install-kubeadm/ -if [[ "$(sudo ufw status)" == "Status: active" ]]; then - if [[ "$1" == "master" ]]; then - sudo ufw allow 6443/tcp - sudo ufw allow 2379:2380/tcp - sudo ufw allow 10250/tcp - sudo ufw allow 10251/tcp - sudo ufw allow 10252/tcp - sudo ufw allow 10255/tcp - else - sudo ufw allow 10250/tcp - sudo ufw allow 10255/tcp - sudo ufw allow 30000:32767/tcp + sudo apt-get update + echo; echo "prereqs.sh: ($(date)) Install kubectl, kubelet, kubeadm" + sudo apt-get -y install --allow-downgrades kubectl=${KUBE_VERSION}-00 \ + kubelet=${KUBE_VERSION}-00 kubeadm=${KUBE_VERSION}-00 + echo; echo "prereqs.sh: ($(date)) Install jq for API output parsing" + sudo apt-get install -y jq + if [[ "$(sudo ufw status)" == "Status: active" ]]; then + echo; echo "prereqs.sh: ($(date)) Set firewall rules" + if [[ "$1" == "master" ]]; then + sudo ufw allow 6443/tcp + sudo ufw allow 2379:2380/tcp + sudo ufw allow 10250/tcp + sudo ufw allow 10251/tcp + sudo ufw allow 10252/tcp + sudo ufw allow 10255/tcp + else + sudo ufw allow 10250/tcp + sudo ufw allow 10255/tcp + sudo ufw allow 30000:32767/tcp + fi fi + # TODO: fix need for this workaround: disable firewall since the commands + # above do not appear to open the needed ports, even if ufw is inactive + # (symptom: nodeport requests fail unless sent from within the cluster or + # to the node IP where the pod is assigned) issue discovered ~11/16/17 + sudo ufw disable +else + echo; echo "prereqs.sh: ($(date)) Basic prerequisites" + sudo yum update -y + sudo yum install -y wget git + echo; echo "prereqs.sh: ($(date)) Install latest docker" + # per https://docs.docker.com/engine/installation/linux/docker-ce/centos/#install-from-a-package + sudo yum install -y docker + sudo systemctl enable docker + sudo systemctl start docker +# wget https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-17.09.0.ce-1.el7.centos.x86_64.rpm +# sudo yum install -y docker-ce-17.09.0.ce-1.el7.centos.x86_64.rpm +# sudo systemctl start docker + echo; echo "prereqs.sh: ($(date)) Install kubectl, kubelet, kubeadm" + cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo +[kubernetes] +name=Kubernetes +baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64 +enabled=1 +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg +EOF + sudo setenforce 0 + sudo yum install -y kubelet kubeadm kubectl + sudo systemctl enable kubelet + sudo systemctl start kubelet + echo; echo "prereqs.sh: ($(date)) Install jq for API output parsing" + sudo yum install -y jq + echo; echo "prereqs.sh: ($(date)) Set firewall rules" + cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf +net.bridge.bridge-nf-call-ip6tables = 1 +net.bridge.bridge-nf-call-iptables = 1 +EOF + sudo sysctl --system fi -# TODO: fix need for this workaround: disable firewall since the commands -# above do not appear to open the needed ports, even if ufw is inactive -# (symptom: nodeport requests fail unless sent from within the cluster or -# to the node IP where the pod is assigned) issue discovered ~11/16/17 -sudo ufw disable EOG } @@ -120,33 +159,58 @@ function setup_k8s_master() { setup_prereqs # Install master - bash /tmp/prereqs.sh master + bash ~/prereqs.sh master # per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/ # If the following command fails, run "kubeadm reset" before trying again # --pod-network-cidr=192.168.0.0/16 is required for calico; this should not # conflict with your server network interface subnets + log "Reset kubeadm in case pre-existing cluster" + sudo kubeadm reset + # Start cluster + log "Start the cluster" sudo kubeadm init --pod-network-cidr=192.168.0.0/16 >>/tmp/kubeadm.out cat /tmp/kubeadm.out export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out) log "Cluster join command for manual use if needed: $k8s_joincmd" - - # Start cluster - log "Start the cluster" mkdir -p $HOME/.kube sudo cp -f /etc/kubernetes/admin.conf $HOME/.kube/config sudo chown $(id -u):$(id -g) $HOME/.kube/config # Deploy pod network log "Deploy calico as CNI" # Updated to deploy Calico 2.6 per the create-cluster-kubeadm guide above - # sudo kubectl apply -f http://docs.projectcalico.org/v2.4/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml - sudo kubectl apply -f https://docs.projectcalico.org/v2.6/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml + # kubectl apply -f http://docs.projectcalico.org/v2.4/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml + kubectl apply -f https://docs.projectcalico.org/v2.6/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml + + # TODO: document process dependency + # Failure to wait for all calico pods to be running can cause the first worker + # to be incompletely setup. Symptom is that node_ports cannot be routed + # via that node (no response - incoming SYN packets are dropped). + log "Wait for calico pods to be Running" + # calico-etcd, calico-kube-controllers, calico-node + pods=$(kubectl get pods --namespace kube-system | grep -c calico) + while [[ $pods -lt 3 ]]; do + log "all calico pods are not yet created. Waiting 10 seconds" + sleep 10 + pods=$(kubectl get pods --namespace kube-system | grep -c calico) + done + + pods=$(kubectl get pods --all-namespaces | awk '/calico/ {print $2}') + for pod in $pods; do + status=$(kubectl get pods --all-namespaces | awk "/$pod/ {print \$4}") + while [[ "$status" != "Running" ]]; do + log "$pod status is $status. Waiting 10 seconds" + sleep 10 + status=$(kubectl get pods --all-namespaces | awk "/$pod/ {print \$4}") + done + log "$pod status is $status" + done log "Wait for kubedns to be Running" - kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}') + kubedns=$(kubectl get pods --all-namespaces | awk '/kube-dns/ {print $4}') while [[ "$kubedns" != "Running" ]]; do - log "kube-dns status is $kubedns. Waiting 60 seconds for it to be 'Running'" + log "kube-dns status is $kubedns. Waiting 60 seconds" sleep 60 - kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}') + kubedns=$(kubectl get pods --all-namespaces | awk '/kube-dns/ {print $4}') done log "kube-dns status is $kubedns" } @@ -156,35 +220,61 @@ function setup_k8s_workers() { export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out) log "Installing workers at $1 with joincmd: $k8s_joincmd" +# TODO: kubeadm reset below is workaround for +# Ubuntu: "[preflight] Some fatal errors occurred: /var/lib/kubelet is not empty" +# per https://github.com/kubernetes/kubeadm/issues/1 +# Centos: "Failed to start ContainerManager failed to initialize top +# level QOS containers: root container /kubepods doesn't exist" + tee start_worker.sh <<EOF +sudo kubeadm reset +sudo $k8s_joincmd +EOF + for worker in $workers; do - log "Install worker at $worker" + host=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $USER@$worker hostname) + log "Install worker at $worker hostname $host" if ! scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ - /tmp/prereqs.sh ubuntu@$worker:/tmp/prereqs.sh ; then + ~/prereqs.sh $USER@$worker:/home/$USER/. ; then fail "Failed copying setup files to $worker" fi + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + $USER@$worker bash prereqs.sh scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ~/k8s_env.sh \ - ubuntu@$worker:/home/ubuntu/. + $USER@$worker:/home/$USER/. + scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + start_worker.sh $USER@$worker:/home/$USER/. ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ - ubuntu@$worker <<EOF > /dev/null 2>&1 & -bash /tmp/prereqs.sh worker -# Workaround for "[preflight] Some fatal errors occurred: /var/lib/kubelet -# is not empty" per https://github.com/kubernetes/kubeadm/issues/1 -sudo kubeadm reset -sudo $k8s_joincmd -EOF + $USER@$worker bash start_worker.sh done for worker in $workers; do - host=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$worker hostname) - log "checking node $host" + host=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $USER@$worker hostname) + log "checking that node $host is 'Ready'" status=$(kubectl get nodes | awk "/$host/ {print \$2}") while [[ "$status" != "Ready" ]]; do - log "node $host is \"$status\", waiting 10 seconds for it to be 'Ready'." + log "node $host is \"$status\", waiting 10 seconds" status=$(kubectl get nodes | awk "/$host/ {print \$2}") + ((tries++)) + if [[ tries -gt 18 ]]; then + log "node $host is \"$status\" after 3 minutes; resetting kubeadm" + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + $USER@$worker bash start_worker.sh + tries=1 + fi sleep 10 done log "node $host is 'Ready'." done + + log "***** kube proxy pods *****" + pods=$(kubectl get pods --all-namespaces | awk '/kube-proxy/ {print $2}') + for pod in $pods; do + echo; echo "**** $pod ****" + kubectl describe pods --namespace kube-system $pod + echo; echo "**** $pod logs ****" + kubectl logs --namespace kube-system $pod + done + log "Cluster is ready (all nodes in 'kubectl get nodes' show as 'Ready')." } @@ -196,6 +286,8 @@ function setup_ceph() { fi } +dist=$(grep --m 1 ID /etc/os-release | awk -F '=' '{print $2}' | sed 's/"//g') + workers="$2" privnet=$3 pubnet=$4 diff --git a/tools/maas/deploy.sh b/tools/maas/deploy.sh index 55984da..484a389 100644 --- a/tools/maas/deploy.sh +++ b/tools/maas/deploy.sh @@ -18,13 +18,15 @@ #. #. Prerequisites: #. - MAAS server configured to admin a set of servers +#. - User is logged into the MAAS server e.g. via maas login opnfv <url> #. - Password-less ssh key provided for node setup #. Usage: on the MAAS server #. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models -#. $ source ~/models/tools/maas/demo_deploy.sh <key> "<hosts>" [<extras>] +#. $ source ~/models/tools/maas/demo_deploy.sh <os> <key> "<hosts>" [<extras>] +#. <os>: "xenial" (Ubtuntu Xenial) or "centos" (Centos 7) #. <key>: name of private key for cluster node ssh (in current folder) #. <hosts>: space separated list of hostnames managed by MAAS -#. <extras>: optional name of script for extra setup functions as needed +#. <extras>: optional name and parameters of script for extra setup functions function log() { f=$(caller 0 | awk '{print $2}') @@ -57,7 +59,11 @@ function deploy_nodes() { log "Deploying node $node" id=$(maas opnfv machines read hostname=$node | jq -r '.[0].system_id') maas opnfv machines allocate system_id=$id - maas opnfv machine deploy $id + if [[ "$os" == "ubuntu" ]]; then + maas opnfv machine deploy $id + else + maas opnfv machine deploy $id distro_series=$os hwe_kernel=generic + fi done } @@ -68,9 +74,10 @@ function wait_nodes_status() { done } -key=$1 -nodes="$2" -extras=$3 +os=$1 +key=$2 +nodes="$3" +extras="$4" release_nodes "$nodes" wait_nodes_status "$nodes" Ready @@ -78,4 +85,4 @@ deploy_nodes "$nodes" wait_nodes_status "$nodes" Deployed eval `ssh-agent` ssh-add $key -if [[ "x$extras" != "x" ]]; then source $extras; fi +if [[ "x$extras" != "x" ]]; then source $extras $5 $6 $7 $8 $9; fi |