From 0ed35881347dc09ede49d1520f4870a326bf640e Mon Sep 17 00:00:00 2001 From: Bryan Sullivan Date: Mon, 4 Dec 2017 16:19:31 -0800 Subject: Add centos support JIRA: MODELS-2 Reverified ubuntu still works Change-Id: I388238b70306cd9c6989d11c09dddcf809d081a1 Signed-off-by: Bryan Sullivan --- tools/kubernetes/k8s-cluster.sh | 226 ++++++++++++++++++++++++++++------------ 1 file changed, 159 insertions(+), 67 deletions(-) (limited to 'tools/kubernetes/k8s-cluster.sh') diff --git a/tools/kubernetes/k8s-cluster.sh b/tools/kubernetes/k8s-cluster.sh index 1700a6a..cf1e840 100644 --- a/tools/kubernetes/k8s-cluster.sh +++ b/tools/kubernetes/k8s-cluster.sh @@ -15,7 +15,7 @@ # #. What this is: script to setup a kubernetes cluster with calico as sni #. Prerequisites: -#. - Ubuntu xenial server for master and worker nodes +#. - Ubuntu Xenial or Centos 7 server for master and worker nodes #. - key-based auth setup for ssh/scp between master and worker nodes #. - 192.168.0.0/16 should not be used on your server network interface subnets #. Usage: @@ -54,64 +54,103 @@ function log() { f=$(caller 0 | awk '{print $2}') l=$(caller 0 | awk '{print $1}') echo; echo "$f:$l ($(date)) $1" + kubectl get pods --all-namespaces } function setup_prereqs() { log "Create prerequisite setup script" - cat <<'EOG' >/tmp/prereqs.sh + cat <<'EOG' >~/prereqs.sh #!/bin/bash # Basic server pre-reqs -echo; echo "prereqs.sh: ($(date)) Basic prerequisites" -sudo apt-get update -sudo apt-get upgrade -y +dist=$(grep --m 1 ID /etc/os-release | awk -F '=' '{print $2}' | sed 's/"//g') if [[ $(grep -c $HOSTNAME /etc/hosts) -eq 0 ]]; then echo; echo "prereqs.sh: ($(date)) Add $HOSTNAME to /etc/hosts" - echo "$(ip route get 8.8.8.8 | awk '{print $NF; exit}') $HOSTNAME" \ + # have to add "/sbin" to path of IP command for centos + echo "$(/sbin/ip route get 8.8.8.8 | awk '{print $NF; exit}') $HOSTNAME" \ | sudo tee -a /etc/hosts fi -echo; echo "prereqs.sh: ($(date)) Install latest docker" -sudo apt-get install -y docker.io -# Alternate for 1.12.6 -#sudo apt-get install -y libltdl7 -#wget https://packages.docker.com/1.12/apt/repo/pool/main/d/docker-engine/docker-engine_1.12.6~cs8-0~ubuntu-xenial_amd64.deb -#sudo dpkg -i docker-engine_1.12.6~cs8-0~ubuntu-xenial_amd64.deb -sudo service docker restart -echo; echo "prereqs.sh: ($(date)) Get k8s packages" -export KUBE_VERSION=1.7.5 -# per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/ -# Install kubelet, kubeadm, kubectl per https://kubernetes.io/docs/setup/independent/install-kubeadm/ -sudo apt-get update && sudo apt-get install -y apt-transport-https -curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - -cat <>/tmp/kubeadm.out cat /tmp/kubeadm.out export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out) log "Cluster join command for manual use if needed: $k8s_joincmd" - - # Start cluster - log "Start the cluster" mkdir -p $HOME/.kube sudo cp -f /etc/kubernetes/admin.conf $HOME/.kube/config sudo chown $(id -u):$(id -g) $HOME/.kube/config # Deploy pod network log "Deploy calico as CNI" # Updated to deploy Calico 2.6 per the create-cluster-kubeadm guide above - # sudo kubectl apply -f http://docs.projectcalico.org/v2.4/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml - sudo kubectl apply -f https://docs.projectcalico.org/v2.6/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml + # kubectl apply -f http://docs.projectcalico.org/v2.4/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml + kubectl apply -f https://docs.projectcalico.org/v2.6/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml + + # TODO: document process dependency + # Failure to wait for all calico pods to be running can cause the first worker + # to be incompletely setup. Symptom is that node_ports cannot be routed + # via that node (no response - incoming SYN packets are dropped). + log "Wait for calico pods to be Running" + # calico-etcd, calico-kube-controllers, calico-node + pods=$(kubectl get pods --namespace kube-system | grep -c calico) + while [[ $pods -lt 3 ]]; do + log "all calico pods are not yet created. Waiting 10 seconds" + sleep 10 + pods=$(kubectl get pods --namespace kube-system | grep -c calico) + done + + pods=$(kubectl get pods --all-namespaces | awk '/calico/ {print $2}') + for pod in $pods; do + status=$(kubectl get pods --all-namespaces | awk "/$pod/ {print \$4}") + while [[ "$status" != "Running" ]]; do + log "$pod status is $status. Waiting 10 seconds" + sleep 10 + status=$(kubectl get pods --all-namespaces | awk "/$pod/ {print \$4}") + done + log "$pod status is $status" + done log "Wait for kubedns to be Running" - kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}') + kubedns=$(kubectl get pods --all-namespaces | awk '/kube-dns/ {print $4}') while [[ "$kubedns" != "Running" ]]; do - log "kube-dns status is $kubedns. Waiting 60 seconds for it to be 'Running'" + log "kube-dns status is $kubedns. Waiting 60 seconds" sleep 60 - kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}') + kubedns=$(kubectl get pods --all-namespaces | awk '/kube-dns/ {print $4}') done log "kube-dns status is $kubedns" } @@ -156,35 +220,61 @@ function setup_k8s_workers() { export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out) log "Installing workers at $1 with joincmd: $k8s_joincmd" +# TODO: kubeadm reset below is workaround for +# Ubuntu: "[preflight] Some fatal errors occurred: /var/lib/kubelet is not empty" +# per https://github.com/kubernetes/kubeadm/issues/1 +# Centos: "Failed to start ContainerManager failed to initialize top +# level QOS containers: root container /kubepods doesn't exist" + tee start_worker.sh < /dev/null 2>&1 & -bash /tmp/prereqs.sh worker -# Workaround for "[preflight] Some fatal errors occurred: /var/lib/kubelet -# is not empty" per https://github.com/kubernetes/kubeadm/issues/1 -sudo kubeadm reset -sudo $k8s_joincmd -EOF + $USER@$worker bash start_worker.sh done for worker in $workers; do - host=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$worker hostname) - log "checking node $host" + host=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $USER@$worker hostname) + log "checking that node $host is 'Ready'" status=$(kubectl get nodes | awk "/$host/ {print \$2}") while [[ "$status" != "Ready" ]]; do - log "node $host is \"$status\", waiting 10 seconds for it to be 'Ready'." + log "node $host is \"$status\", waiting 10 seconds" status=$(kubectl get nodes | awk "/$host/ {print \$2}") + ((tries++)) + if [[ tries -gt 18 ]]; then + log "node $host is \"$status\" after 3 minutes; resetting kubeadm" + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + $USER@$worker bash start_worker.sh + tries=1 + fi sleep 10 done log "node $host is 'Ready'." done + + log "***** kube proxy pods *****" + pods=$(kubectl get pods --all-namespaces | awk '/kube-proxy/ {print $2}') + for pod in $pods; do + echo; echo "**** $pod ****" + kubectl describe pods --namespace kube-system $pod + echo; echo "**** $pod logs ****" + kubectl logs --namespace kube-system $pod + done + log "Cluster is ready (all nodes in 'kubectl get nodes' show as 'Ready')." } @@ -196,6 +286,8 @@ function setup_ceph() { fi } +dist=$(grep --m 1 ID /etc/os-release | awk -F '=' '{print $2}' | sed 's/"//g') + workers="$2" privnet=$3 pubnet=$4 -- cgit 1.2.3-korg