diff options
author | 2017-11-01 22:28:15 -0700 | |
---|---|---|
committer | 2017-11-01 22:28:15 -0700 | |
commit | 5370e971211cf35c844988646404acbca2e33201 (patch) | |
tree | 6a31ceea780f8619b2ce6fa1ce3c0869866c0aca /tools/kubernetes | |
parent | 14cad79fc39fafa942f07f8b4c5c671c33b7a427 (diff) |
Improve logging. Remove extra ceph test step.
JIRA: MODELS-23
Change-Id: Idd377ee35ae7b90e10c95b4b41e13bfd533b30e0
Signed-off-by: Bryan Sullivan <bryan.sullivan@att.com>
Diffstat (limited to 'tools/kubernetes')
-rw-r--r-- | tools/kubernetes/ceph-baremetal.sh | 54 | ||||
-rw-r--r-- | tools/kubernetes/ceph-helm.sh | 18 | ||||
-rw-r--r-- | tools/kubernetes/k8s-cluster.sh | 51 |
3 files changed, 67 insertions, 56 deletions
diff --git a/tools/kubernetes/ceph-baremetal.sh b/tools/kubernetes/ceph-baremetal.sh index dcad340..d806178 100644 --- a/tools/kubernetes/ceph-baremetal.sh +++ b/tools/kubernetes/ceph-baremetal.sh @@ -31,20 +31,26 @@ #. Status: work in progress, incomplete # +function log() { + f=$(caller 0 | awk '{print $2}') + l=$(caller 0 | awk '{print $1}') + echo "$f:$l ($(date)) $1" +} + function setup_ceph() { node_ips=$1 cluster_net=$2 public_net=$3 ceph_dev=$4 - echo "${FUNCNAME[0]}: Deploying ceph-mon on localhost $HOSTNAME" - echo "${FUNCNAME[0]}: Deploying ceph-osd on nodes $node_ips" - echo "${FUNCNAME[0]}: Setting cluster-network=$cluster_net and public-network=$public_net" + log "Deploying ceph-mon on localhost $HOSTNAME" + log "Deploying ceph-osd on nodes $node_ips" + log "Setting cluster-network=$cluster_net and public-network=$public_net" mon_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}') all_nodes="$mon_ip $node_ips" # Also caches the server fingerprints so ceph-deploy does not prompt the user # Note this loop may be partially redundant with the ceph-deploy steps below for node_ip in $all_nodes; do - echo "${FUNCNAME[0]}: Install ntp and ceph on $node_ip" + log "Install ntp and ceph on $node_ip" ssh -x -o StrictHostKeyChecking=no ubuntu@$node_ip <<EOF sudo timedatectl set-ntp no wget -q -O- 'https://download.ceph.com/keys/release.asc' | sudo apt-key add - @@ -56,11 +62,11 @@ EOF # per http://docs.ceph.com/docs/master/start/quick-ceph-deploy/ # also https://upcommons.upc.edu/bitstream/handle/2117/101816/Degree_Thesis_Nabil_El_Alami.pdf#vote +1 - echo "${FUNCNAME[0]}: Create ceph config folder ~/ceph-cluster" + log "Create ceph config folder ~/ceph-cluster" mkdir ~/ceph-cluster cd ~/ceph-cluster - echo "${FUNCNAME[0]}: Create new cluster with $HOSTNAME as initial ceph-mon node" + log "Create new cluster with $HOSTNAME as initial ceph-mon node" ceph-deploy new --cluster-network $cluster_net --public-network $public_net --no-ssh-copykey $HOSTNAME # Update conf per recommendations of http://docs.ceph.com/docs/jewel/rados/configuration/filesystem-recommendations/ cat <<EOF >>ceph.conf @@ -69,16 +75,16 @@ osd max object namespace len = 64 EOF cat ceph.conf - echo "${FUNCNAME[0]}: Deploy ceph packages on other nodes" + log "Deploy ceph packages on other nodes" ceph-deploy install $mon_ip $node_ips - echo "${FUNCNAME[0]}: Deploy the initial monitor and gather the keys" + log "Deploy the initial monitor and gather the keys" ceph-deploy mon create-initial if [[ "x$ceph_dev" == "x" ]]; then n=1 for node_ip in $node_ips; do - echo "${FUNCNAME[0]}: Prepare ceph OSD on node $node_ip" + log "Prepare ceph OSD on node $node_ip" echo "$node_ip ceph-osd$n" | sudo tee -a /etc/hosts # Using ceph-osd$n here avoids need for manual acceptance of the new server hash ssh -x -o StrictHostKeyChecking=no ubuntu@ceph-osd$n <<EOF @@ -90,17 +96,17 @@ EOF ((n++)) done else - echo "${FUNCNAME[0]}: Deploy OSDs" + log "Deploy OSDs" for node_ip in $node_ips; do - echo "${FUNCNAME[0]}: Create ceph osd on $node_ip using $ceph_dev" + log "Create ceph osd on $node_ip using $ceph_dev" ceph-deploy osd create $node_ip:$ceph_dev done fi - echo "${FUNCNAME[0]}: Copy the config file and admin key to the admin node and OSD nodes" + log "Copy the config file and admin key to the admin node and OSD nodes" ceph-deploy admin $mon_ip $node_ips - echo "${FUNCNAME[0]}: Check the cluster health" + log "Check the cluster health" sudo ceph health sudo ceph -s @@ -108,22 +114,22 @@ EOF # rbd is not included in default kube-controller-manager... use attcomdev version sudo sed -i -- 's~gcr.io/google_containers/kube-controller-manager-amd64:.*~quay.io/attcomdev/kube-controller-manager:v1.7.3~' /etc/kubernetes/manifests/kube-controller-manager.yaml if [[ $(sudo grep -c attcomdev/kube-controller-manager /etc/kubernetes/manifests/kube-controller-manager.yaml) == 0 ]]; then - echo "${FUNCNAME[0]}: Problem patching /etc/kubernetes/manifests/kube-controller-manager.yaml... script update needed" + log "Problem patching /etc/kubernetes/manifests/kube-controller-manager.yaml... script update needed" exit 1 fi mgr=$(kubectl get pods --all-namespaces | grep kube-controller-manager | awk '{print $4}') while [[ "$mgr" != "Running" ]]; do - echo "${FUNCNAME[0]}: kube-controller-manager status is $mgr. Waiting 60 seconds for it to be 'Running'" + log "kube-controller-manager status is $mgr. Waiting 60 seconds for it to be 'Running'" sleep 60 mgr=$(kubectl get pods --all-namespaces | grep kube-controller-manager | awk '{print $4}') done - echo "${FUNCNAME[0]}: kube-controller-manager status is $mgr" + log "kube-controller-manager status is $mgr" - echo "${FUNCNAME[0]}: Create Ceph admin secret" + log "Create Ceph admin secret" admin_key=$(sudo ceph auth get-key client.admin) kubectl create secret generic ceph-secret-admin --from-literal=key="$admin_key" --namespace=kube-system --type=kubernetes.io/rbd - echo "${FUNCNAME[0]}: Create rdb storageClass 'general'" + log "Create rdb storageClass 'general'" cat <<EOF >/tmp/ceph-sc.yaml apiVersion: storage.k8s.io/v1 kind: StorageClass @@ -143,21 +149,21 @@ EOF sudo chown -R ubuntu:ubuntu ~/.kube/* kubectl create -f /tmp/ceph-sc.yaml - echo "${FUNCNAME[0]}: Create storage pool 'kube'" + log "Create storage pool 'kube'" # https://github.com/kubernetes/examples/blob/master/staging/persistent-volume-provisioning/README.md method sudo ceph osd pool create kube 32 32 - echo "${FUNCNAME[0]}: Authorize client 'kube' access to pool 'kube'" + log "Authorize client 'kube' access to pool 'kube'" sudo ceph auth get-or-create client.kube mon 'allow r' osd 'allow rwx pool=kube' - echo "${FUNCNAME[0]}: Create ceph-secret-user secret in namespace 'default'" + log "Create ceph-secret-user secret in namespace 'default'" kube_key=$(sudo ceph auth get-key client.kube) kubectl create secret generic ceph-secret-user --from-literal=key="$kube_key" --namespace=default --type=kubernetes.io/rbd # A similar secret must be created in other namespaces that intend to access the ceph pool # Per https://github.com/kubernetes/examples/blob/master/staging/persistent-volume-provisioning/README.md - echo "${FUNCNAME[0]}: Create andtest a persistentVolumeClaim" + log "Create andtest a persistentVolumeClaim" cat <<EOF >/tmp/ceph-pvc.yaml { "kind": "PersistentVolumeClaim", @@ -182,11 +188,11 @@ EOF EOF kubectl create -f /tmp/ceph-pvc.yaml while [[ "x$(kubectl get pvc -o jsonpath='{.status.phase}' claim1)" != "xBound" ]]; do - echo "${FUNCNAME[0]}: Waiting for pvc claim1 to be 'Bound'" + log "Waiting for pvc claim1 to be 'Bound'" kubectl describe pvc sleep 10 done - echo "${FUNCNAME[0]}: pvc claim1 successfully bound to $(kubectl get pvc -o jsonpath='{.spec.volumeName}' claim1)" + log "pvc claim1 successfully bound to $(kubectl get pvc -o jsonpath='{.spec.volumeName}' claim1)" kubectl get pvc kubectl delete pvc claim1 kubectl describe pods diff --git a/tools/kubernetes/ceph-helm.sh b/tools/kubernetes/ceph-helm.sh index 4660881..280c045 100644 --- a/tools/kubernetes/ceph-helm.sh +++ b/tools/kubernetes/ceph-helm.sh @@ -32,7 +32,9 @@ # function log() { - echo "${FUNCNAME[0]} $(date): $1" + f=$(caller 0 | awk '{print $2}') + l=$(caller 0 | awk '{print $1}') + echo "$f:$l ($(date)) $1" } function setup_ceph() { @@ -40,6 +42,10 @@ function setup_ceph() { private_net=$2 public_net=$3 dev=$4 + + log "Install ceph prerequisites" + sudo apt-get -y install ceph ceph-common + # per https://github.com/att/netarbiter/tree/master/sds/ceph-docker/examples/helm log "Clone netarbiter" git clone https://github.com/att/netarbiter.git @@ -94,7 +100,7 @@ nameserver $kubedns search ceph.svc.cluster.local svc.cluster.local cluster.local options ndots:5 EOF -sudo apt install -y ceph +sudo apt install -y ceph ceph-common sudo ceph-disk zap /dev/$dev EOG log "Run ceph-osd at $node" @@ -144,14 +150,8 @@ EOG log "pvc ceph-test successfully bound to $(kubectl get pvc -o jsonpath='{.spec.volumeName}' ceph-test)" kubectl describe pvc - log "Attach the pvc to a job and check if the job is successful (i.e., 1)" + log "Attach the pvc to a job" kubectl create -f tests/ceph/job.yaml - status=$(kubectl get jobs ceph-test-job -n default -o json | jq -r '.status.succeeded') - if [[ "$status" != "1" ]]; then - log "pvc attachment was not successful:" - kubectl get jobs ceph-test-job -n default -o json - exit 1 - fi log "Verify that the test job was successful" pod=$(kubectl get pods --namespace default | awk "/ceph-test/{print \$1}") diff --git a/tools/kubernetes/k8s-cluster.sh b/tools/kubernetes/k8s-cluster.sh index 1ef17e2..9072442 100644 --- a/tools/kubernetes/k8s-cluster.sh +++ b/tools/kubernetes/k8s-cluster.sh @@ -44,8 +44,14 @@ #. Status: work in progress, incomplete # +function log() { + f=$(caller 0 | awk '{print $2}') + l=$(caller 0 | awk '{print $1}') + echo "$f:$l ($(date)) $1" +} + function setup_prereqs() { - echo "${FUNCNAME[0]}: Create prerequisite setup script" + log "Create prerequisite setup script" cat <<'EOG' >/tmp/prereqs.sh #!/bin/bash # Basic server pre-reqs @@ -70,15 +76,14 @@ EOF sudo apt-get update # Next command is to workaround bug resulting in "PersistentVolumeClaim is not bound" for pod startup (remain in Pending) # TODO: reverify if this is still an issue in the final working script -sudo apt-get -y install ceph ceph-common sudo apt-get -y install --allow-downgrades kubectl=${KUBE_VERSION}-00 kubelet=${KUBE_VERSION}-00 kubeadm=${KUBE_VERSION}-00 -# Needed for ceph setup etc +# Needed for API output parsing sudo apt-get -y install jq EOG } function setup_k8s_master() { - echo "${FUNCNAME[0]}: Setting up kubernetes master" + log "Setting up kubernetes master" setup_prereqs # Install master @@ -89,35 +94,35 @@ function setup_k8s_master() { sudo kubeadm init --pod-network-cidr=192.168.0.0/16 >>/tmp/kubeadm.out cat /tmp/kubeadm.out export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out) - echo "${FUNCNAME[0]}: Cluster join command for manual use if needed: $k8s_joincmd" + log "Cluster join command for manual use if needed: $k8s_joincmd" # Start cluster - echo "${FUNCNAME[0]}: Start the cluster" + log "Start the cluster" mkdir -p $HOME/.kube sudo cp -f /etc/kubernetes/admin.conf $HOME/.kube/config sudo chown $(id -u):$(id -g) $HOME/.kube/config # Deploy pod network - echo "${FUNCNAME[0]}: Deploy calico as CNI" + log "Deploy calico as CNI" sudo kubectl apply -f http://docs.projectcalico.org/v2.4/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml } function setup_k8s_agents() { agents="$1" export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out) - echo "${FUNCNAME[0]}: Installing agents at $1 with joincmd: $k8s_joincmd" + log "Installing agents at $1 with joincmd: $k8s_joincmd" setup_prereqs kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}') while [[ "$kubedns" != "Running" ]]; do - echo "${FUNCNAME[0]}: kube-dns status is $kubedns. Waiting 60 seconds for it to be 'Running'" + log "kube-dns status is $kubedns. Waiting 60 seconds for it to be 'Running'" sleep 60 kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}') done - echo "${FUNCNAME[0]}: kube-dns status is $kubedns" + log "kube-dns status is $kubedns" for agent in $agents; do - echo "${FUNCNAME[0]}: Install agent at $agent" + log "Install agent at $agent" scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no /tmp/prereqs.sh ubuntu@$agent:/tmp/prereqs.sh ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent bash /tmp/prereqs.sh agent # Workaround for "[preflight] Some fatal errors occurred: /var/lib/kubelet is not empty" per https://github.com/kubernetes/kubeadm/issues/1 @@ -125,30 +130,30 @@ function setup_k8s_agents() { ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent sudo $k8s_joincmd done - echo "${FUNCNAME[0]}: Cluster is ready when all nodes in the output of 'kubectl get nodes' show as 'Ready'." + log "Cluster is ready when all nodes in the output of 'kubectl get nodes' show as 'Ready'." } function wait_for_service() { - echo "${FUNCNAME[0]}: Waiting for service $1 to be available" + log "Waiting for service $1 to be available" pod=$(kubectl get pods --namespace default | awk "/$1/ { print \$1 }") - echo "${FUNCNAME[0]}: Service $1 is at pod $pod" + log "Service $1 is at pod $pod" ready=$(kubectl get pods --namespace default -o jsonpath='{.status.containerStatuses[0].ready}' $pod) while [[ "$ready" != "true" ]]; do - echo "${FUNCNAME[0]}: $1 container is not yet ready... waiting 10 seconds" + log "$1 container is not yet ready... waiting 10 seconds" sleep 10 # TODO: figure out why transient pods sometimes mess up this logic, thus need to re-get the pods pod=$(kubectl get pods --namespace default | awk "/$1/ { print \$1 }") ready=$(kubectl get pods --namespace default -o jsonpath='{.status.containerStatuses[0].ready}' $pod) done - echo "${FUNCNAME[0]}: pod $pod container status is $ready" + log "pod $pod container status is $ready" host_ip=$(kubectl get pods --namespace default -o jsonpath='{.status.hostIP}' $pod) port=$(kubectl get --namespace default -o jsonpath="{.spec.ports[0].nodePort}" services $1) - echo "${FUNCNAME[0]}: pod $pod container is at host $host_ip and port $port" + log "pod $pod container is at host $host_ip and port $port" while ! curl http://$host_ip:$port ; do - echo "${FUNCNAME[0]}: $1 service is not yet responding... waiting 10 seconds" + log "$1 service is not yet responding... waiting 10 seconds" sleep 10 done - echo "${FUNCNAME[0]}: $1 is available at http://$host_ip:$port" + log "$1 is available at http://$host_ip:$port" } function demo_chart() { @@ -211,7 +216,7 @@ function demo_chart() { wait_for_service oc-owncloud ;; *) - echo "${FUNCNAME[0]}: demo not implemented for $1" + log "demo not implemented for $1" esac # extra useful commands # kubectl describe pvc @@ -225,7 +230,7 @@ function demo_chart() { } function setup_helm() { - echo "${FUNCNAME[0]}: Setup helm" + log "Setup helm" # Install Helm cd ~ curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get > get_helm.sh @@ -242,11 +247,11 @@ function setup_helm() { # Wait till tiller is running tiller_deploy=$(kubectl get pods --all-namespaces | grep tiller-deploy | awk '{print $4}') while [[ "$tiller_deploy" != "Running" ]]; do - echo "${FUNCNAME[0]}: tiller-deploy status is $tiller_deploy. Waiting 60 seconds for it to be 'Running'" + log "tiller-deploy status is $tiller_deploy. Waiting 60 seconds for it to be 'Running'" sleep 60 tiller_deploy=$(kubectl get pods --all-namespaces | grep tiller-deploy | awk '{print $4}') done - echo "${FUNCNAME[0]}: tiller-deploy status is $tiller_deploy" + log "tiller-deploy status is $tiller_deploy" # Install services via helm charts from https://kubeapps.com/charts # e.g. helm install stable/dokuwiki |