summaryrefslogtreecommitdiffstats
path: root/tools/kubernetes/k8s-cluster.sh
diff options
context:
space:
mode:
Diffstat (limited to 'tools/kubernetes/k8s-cluster.sh')
-rw-r--r--tools/kubernetes/k8s-cluster.sh226
1 files changed, 159 insertions, 67 deletions
diff --git a/tools/kubernetes/k8s-cluster.sh b/tools/kubernetes/k8s-cluster.sh
index 1700a6a..cf1e840 100644
--- a/tools/kubernetes/k8s-cluster.sh
+++ b/tools/kubernetes/k8s-cluster.sh
@@ -15,7 +15,7 @@
#
#. What this is: script to setup a kubernetes cluster with calico as sni
#. Prerequisites:
-#. - Ubuntu xenial server for master and worker nodes
+#. - Ubuntu Xenial or Centos 7 server for master and worker nodes
#. - key-based auth setup for ssh/scp between master and worker nodes
#. - 192.168.0.0/16 should not be used on your server network interface subnets
#. Usage:
@@ -54,64 +54,103 @@ function log() {
f=$(caller 0 | awk '{print $2}')
l=$(caller 0 | awk '{print $1}')
echo; echo "$f:$l ($(date)) $1"
+ kubectl get pods --all-namespaces
}
function setup_prereqs() {
log "Create prerequisite setup script"
- cat <<'EOG' >/tmp/prereqs.sh
+ cat <<'EOG' >~/prereqs.sh
#!/bin/bash
# Basic server pre-reqs
-echo; echo "prereqs.sh: ($(date)) Basic prerequisites"
-sudo apt-get update
-sudo apt-get upgrade -y
+dist=$(grep --m 1 ID /etc/os-release | awk -F '=' '{print $2}' | sed 's/"//g')
if [[ $(grep -c $HOSTNAME /etc/hosts) -eq 0 ]]; then
echo; echo "prereqs.sh: ($(date)) Add $HOSTNAME to /etc/hosts"
- echo "$(ip route get 8.8.8.8 | awk '{print $NF; exit}') $HOSTNAME" \
+ # have to add "/sbin" to path of IP command for centos
+ echo "$(/sbin/ip route get 8.8.8.8 | awk '{print $NF; exit}') $HOSTNAME" \
| sudo tee -a /etc/hosts
fi
-echo; echo "prereqs.sh: ($(date)) Install latest docker"
-sudo apt-get install -y docker.io
-# Alternate for 1.12.6
-#sudo apt-get install -y libltdl7
-#wget https://packages.docker.com/1.12/apt/repo/pool/main/d/docker-engine/docker-engine_1.12.6~cs8-0~ubuntu-xenial_amd64.deb
-#sudo dpkg -i docker-engine_1.12.6~cs8-0~ubuntu-xenial_amd64.deb
-sudo service docker restart
-echo; echo "prereqs.sh: ($(date)) Get k8s packages"
-export KUBE_VERSION=1.7.5
-# per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/
-# Install kubelet, kubeadm, kubectl per https://kubernetes.io/docs/setup/independent/install-kubeadm/
-sudo apt-get update && sudo apt-get install -y apt-transport-https
-curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
-cat <<EOF | sudo tee /etc/apt/sources.list.d/kubernetes.list
+if [[ "$dist" == "ubuntu" ]]; then
+ # Per https://kubernetes.io/docs/setup/independent/install-kubeadm/
+ echo; echo "prereqs.sh: ($(date)) Basic prerequisites"
+ sudo apt-get update
+ sudo apt-get upgrade -y
+ echo; echo "prereqs.sh: ($(date)) Install latest docker"
+ sudo apt-get install -y docker.io
+ # Alternate for 1.12.6
+ #sudo apt-get install -y libltdl7
+ #wget https://packages.docker.com/1.12/apt/repo/pool/main/d/docker-engine/docker-engine_1.12.6~cs8-0~ubuntu-xenial_amd64.deb
+ #sudo dpkg -i docker-engine_1.12.6~cs8-0~ubuntu-xenial_amd64.deb
+ sudo service docker restart
+ echo; echo "prereqs.sh: ($(date)) Get k8s packages"
+ export KUBE_VERSION=1.7.5
+ # per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/
+ # Install kubelet, kubeadm, kubectl per https://kubernetes.io/docs/setup/independent/install-kubeadm/
+ sudo apt-get update && sudo apt-get install -y apt-transport-https
+ curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
+ cat <<EOF | sudo tee /etc/apt/sources.list.d/kubernetes.list
deb http://apt.kubernetes.io/ kubernetes-xenial main
EOF
-sudo apt-get update
-echo; echo "prereqs.sh: ($(date)) Install kubectl, kubelet, kubeadm"
-sudo apt-get -y install --allow-downgrades kubectl=${KUBE_VERSION}-00 \
- kubelet=${KUBE_VERSION}-00 kubeadm=${KUBE_VERSION}-00
-echo; echo "prereqs.sh: ($(date)) Install jq for API output parsing"
-sudo apt-get -y install jq
-echo; echo "prereqs.sh: ($(date)) Set firewall rules"
-# Per https://kubernetes.io/docs/setup/independent/install-kubeadm/
-if [[ "$(sudo ufw status)" == "Status: active" ]]; then
- if [[ "$1" == "master" ]]; then
- sudo ufw allow 6443/tcp
- sudo ufw allow 2379:2380/tcp
- sudo ufw allow 10250/tcp
- sudo ufw allow 10251/tcp
- sudo ufw allow 10252/tcp
- sudo ufw allow 10255/tcp
- else
- sudo ufw allow 10250/tcp
- sudo ufw allow 10255/tcp
- sudo ufw allow 30000:32767/tcp
+ sudo apt-get update
+ echo; echo "prereqs.sh: ($(date)) Install kubectl, kubelet, kubeadm"
+ sudo apt-get -y install --allow-downgrades kubectl=${KUBE_VERSION}-00 \
+ kubelet=${KUBE_VERSION}-00 kubeadm=${KUBE_VERSION}-00
+ echo; echo "prereqs.sh: ($(date)) Install jq for API output parsing"
+ sudo apt-get install -y jq
+ if [[ "$(sudo ufw status)" == "Status: active" ]]; then
+ echo; echo "prereqs.sh: ($(date)) Set firewall rules"
+ if [[ "$1" == "master" ]]; then
+ sudo ufw allow 6443/tcp
+ sudo ufw allow 2379:2380/tcp
+ sudo ufw allow 10250/tcp
+ sudo ufw allow 10251/tcp
+ sudo ufw allow 10252/tcp
+ sudo ufw allow 10255/tcp
+ else
+ sudo ufw allow 10250/tcp
+ sudo ufw allow 10255/tcp
+ sudo ufw allow 30000:32767/tcp
+ fi
fi
+ # TODO: fix need for this workaround: disable firewall since the commands
+ # above do not appear to open the needed ports, even if ufw is inactive
+ # (symptom: nodeport requests fail unless sent from within the cluster or
+ # to the node IP where the pod is assigned) issue discovered ~11/16/17
+ sudo ufw disable
+else
+ echo; echo "prereqs.sh: ($(date)) Basic prerequisites"
+ sudo yum update -y
+ sudo yum install -y wget git
+ echo; echo "prereqs.sh: ($(date)) Install latest docker"
+ # per https://docs.docker.com/engine/installation/linux/docker-ce/centos/#install-from-a-package
+ sudo yum install -y docker
+ sudo systemctl enable docker
+ sudo systemctl start docker
+# wget https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-17.09.0.ce-1.el7.centos.x86_64.rpm
+# sudo yum install -y docker-ce-17.09.0.ce-1.el7.centos.x86_64.rpm
+# sudo systemctl start docker
+ echo; echo "prereqs.sh: ($(date)) Install kubectl, kubelet, kubeadm"
+ cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
+[kubernetes]
+name=Kubernetes
+baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
+enabled=1
+gpgcheck=1
+repo_gpgcheck=1
+gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
+EOF
+ sudo setenforce 0
+ sudo yum install -y kubelet kubeadm kubectl
+ sudo systemctl enable kubelet
+ sudo systemctl start kubelet
+ echo; echo "prereqs.sh: ($(date)) Install jq for API output parsing"
+ sudo yum install -y jq
+ echo; echo "prereqs.sh: ($(date)) Set firewall rules"
+ cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
+net.bridge.bridge-nf-call-ip6tables = 1
+net.bridge.bridge-nf-call-iptables = 1
+EOF
+ sudo sysctl --system
fi
-# TODO: fix need for this workaround: disable firewall since the commands
-# above do not appear to open the needed ports, even if ufw is inactive
-# (symptom: nodeport requests fail unless sent from within the cluster or
-# to the node IP where the pod is assigned) issue discovered ~11/16/17
-sudo ufw disable
EOG
}
@@ -120,33 +159,58 @@ function setup_k8s_master() {
setup_prereqs
# Install master
- bash /tmp/prereqs.sh master
+ bash ~/prereqs.sh master
# per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/
# If the following command fails, run "kubeadm reset" before trying again
# --pod-network-cidr=192.168.0.0/16 is required for calico; this should not
# conflict with your server network interface subnets
+ log "Reset kubeadm in case pre-existing cluster"
+ sudo kubeadm reset
+ # Start cluster
+ log "Start the cluster"
sudo kubeadm init --pod-network-cidr=192.168.0.0/16 >>/tmp/kubeadm.out
cat /tmp/kubeadm.out
export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out)
log "Cluster join command for manual use if needed: $k8s_joincmd"
-
- # Start cluster
- log "Start the cluster"
mkdir -p $HOME/.kube
sudo cp -f /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# Deploy pod network
log "Deploy calico as CNI"
# Updated to deploy Calico 2.6 per the create-cluster-kubeadm guide above
- # sudo kubectl apply -f http://docs.projectcalico.org/v2.4/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml
- sudo kubectl apply -f https://docs.projectcalico.org/v2.6/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml
+ # kubectl apply -f http://docs.projectcalico.org/v2.4/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml
+ kubectl apply -f https://docs.projectcalico.org/v2.6/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml
+
+ # TODO: document process dependency
+ # Failure to wait for all calico pods to be running can cause the first worker
+ # to be incompletely setup. Symptom is that node_ports cannot be routed
+ # via that node (no response - incoming SYN packets are dropped).
+ log "Wait for calico pods to be Running"
+ # calico-etcd, calico-kube-controllers, calico-node
+ pods=$(kubectl get pods --namespace kube-system | grep -c calico)
+ while [[ $pods -lt 3 ]]; do
+ log "all calico pods are not yet created. Waiting 10 seconds"
+ sleep 10
+ pods=$(kubectl get pods --namespace kube-system | grep -c calico)
+ done
+
+ pods=$(kubectl get pods --all-namespaces | awk '/calico/ {print $2}')
+ for pod in $pods; do
+ status=$(kubectl get pods --all-namespaces | awk "/$pod/ {print \$4}")
+ while [[ "$status" != "Running" ]]; do
+ log "$pod status is $status. Waiting 10 seconds"
+ sleep 10
+ status=$(kubectl get pods --all-namespaces | awk "/$pod/ {print \$4}")
+ done
+ log "$pod status is $status"
+ done
log "Wait for kubedns to be Running"
- kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}')
+ kubedns=$(kubectl get pods --all-namespaces | awk '/kube-dns/ {print $4}')
while [[ "$kubedns" != "Running" ]]; do
- log "kube-dns status is $kubedns. Waiting 60 seconds for it to be 'Running'"
+ log "kube-dns status is $kubedns. Waiting 60 seconds"
sleep 60
- kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}')
+ kubedns=$(kubectl get pods --all-namespaces | awk '/kube-dns/ {print $4}')
done
log "kube-dns status is $kubedns"
}
@@ -156,35 +220,61 @@ function setup_k8s_workers() {
export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out)
log "Installing workers at $1 with joincmd: $k8s_joincmd"
+# TODO: kubeadm reset below is workaround for
+# Ubuntu: "[preflight] Some fatal errors occurred: /var/lib/kubelet is not empty"
+# per https://github.com/kubernetes/kubeadm/issues/1
+# Centos: "Failed to start ContainerManager failed to initialize top
+# level QOS containers: root container /kubepods doesn't exist"
+ tee start_worker.sh <<EOF
+sudo kubeadm reset
+sudo $k8s_joincmd
+EOF
+
for worker in $workers; do
- log "Install worker at $worker"
+ host=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $USER@$worker hostname)
+ log "Install worker at $worker hostname $host"
if ! scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
- /tmp/prereqs.sh ubuntu@$worker:/tmp/prereqs.sh ; then
+ ~/prereqs.sh $USER@$worker:/home/$USER/. ; then
fail "Failed copying setup files to $worker"
fi
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ $USER@$worker bash prereqs.sh
scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ~/k8s_env.sh \
- ubuntu@$worker:/home/ubuntu/.
+ $USER@$worker:/home/$USER/.
+ scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ start_worker.sh $USER@$worker:/home/$USER/.
ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
- ubuntu@$worker <<EOF > /dev/null 2>&1 &
-bash /tmp/prereqs.sh worker
-# Workaround for "[preflight] Some fatal errors occurred: /var/lib/kubelet
-# is not empty" per https://github.com/kubernetes/kubeadm/issues/1
-sudo kubeadm reset
-sudo $k8s_joincmd
-EOF
+ $USER@$worker bash start_worker.sh
done
for worker in $workers; do
- host=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$worker hostname)
- log "checking node $host"
+ host=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $USER@$worker hostname)
+ log "checking that node $host is 'Ready'"
status=$(kubectl get nodes | awk "/$host/ {print \$2}")
while [[ "$status" != "Ready" ]]; do
- log "node $host is \"$status\", waiting 10 seconds for it to be 'Ready'."
+ log "node $host is \"$status\", waiting 10 seconds"
status=$(kubectl get nodes | awk "/$host/ {print \$2}")
+ ((tries++))
+ if [[ tries -gt 18 ]]; then
+ log "node $host is \"$status\" after 3 minutes; resetting kubeadm"
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ $USER@$worker bash start_worker.sh
+ tries=1
+ fi
sleep 10
done
log "node $host is 'Ready'."
done
+
+ log "***** kube proxy pods *****"
+ pods=$(kubectl get pods --all-namespaces | awk '/kube-proxy/ {print $2}')
+ for pod in $pods; do
+ echo; echo "**** $pod ****"
+ kubectl describe pods --namespace kube-system $pod
+ echo; echo "**** $pod logs ****"
+ kubectl logs --namespace kube-system $pod
+ done
+
log "Cluster is ready (all nodes in 'kubectl get nodes' show as 'Ready')."
}
@@ -196,6 +286,8 @@ function setup_ceph() {
fi
}
+dist=$(grep --m 1 ID /etc/os-release | awk -F '=' '{print $2}' | sed 's/"//g')
+
workers="$2"
privnet=$3
pubnet=$4