summaryrefslogtreecommitdiffstats
path: root/tools/kubernetes/ceph-helm.sh
diff options
context:
space:
mode:
Diffstat (limited to 'tools/kubernetes/ceph-helm.sh')
-rw-r--r--tools/kubernetes/ceph-helm.sh156
1 files changed, 84 insertions, 72 deletions
diff --git a/tools/kubernetes/ceph-helm.sh b/tools/kubernetes/ceph-helm.sh
index 3635c83..534fd86 100644
--- a/tools/kubernetes/ceph-helm.sh
+++ b/tools/kubernetes/ceph-helm.sh
@@ -39,13 +39,13 @@ function setup_ceph() {
# per https://github.com/att/netarbiter/tree/master/sds/ceph-docker/examples/helm
echo "${FUNCNAME[0]}: Clone netarbiter"
git clone https://github.com/att/netarbiter.git
-
- echo "${FUNCNAME[0]}: Create a .kube/config secret so that a K8s job could run kubectl inside the container"
cd netarbiter/sds/ceph-docker/examples/helm
- kubectl create namespace ceph
- ./create-secret-kube-config.sh ceph
- ./helm-install-ceph.sh cephtest $private_net $public_net
+ echo "${FUNCNAME[0]}: Prepare a ceph namespace in your K8s cluster"
+ ./prep-ceph-ns.sh
+
+ echo "${FUNCNAME[0]}: Run ceph-mon, ceph-mgr, ceph-mon-check, and rbd-provisioner"
+ # Pre-req per https://github.com/att/netarbiter/tree/master/sds/ceph-docker/examples/helm#notes
kubedns=$(kubectl get service -o json --namespace kube-system kube-dns | \
jq -r '.spec.clusterIP')
@@ -55,13 +55,39 @@ search ceph.svc.cluster.local svc.cluster.local cluster.local
options ndots:5
EOF
+ ./helm-install-ceph.sh cephtest $private_net $public_net
+
+ echo "${FUNCNAME[0]}: Check the pod status of ceph-mon, ceph-mgr, ceph-mon-check, and rbd-provisioner"
+ services="rbd-provisioner ceph-mon-0 ceph-mgr ceph-mon-check"
+ for service in $services; do
+ pod=$(kubectl get pods --namespace ceph | awk "/$service/{print \$1}")
+ status=$(kubectl get pods --namespace ceph $pod -o json | jq -r '.status.phase')
+ while [[ "x$status" != "xRunning" ]]; do
+ echo "${FUNCNAME[0]}: $pod status is \"$status\". Waiting 10 seconds for it to be 'Running'"
+ sleep 10
+ status=$(kubectl get pods --namespace ceph $pod -o json | jq -r '.status.phase')
+ done
+ done
+ kubectl get pods --namespace ceph
+
+ echo "${FUNCNAME[0]}: Check ceph health status"
+ status=$(kubectl -n ceph exec -it ceph-mon-0 -- ceph -s | awk "/health:/{print \$2}")
+ while [[ "x$status" != "xHEALTH_OK" ]]; do
+ echo "${FUNCNAME[0]}: ceph status is \"$status\". Waiting 10 seconds for it to be 'HEALTH_OK'"
+ kubectl -n ceph exec -it ceph-mon-0 -- ceph -s
+ sleep 10
+ status=$(kubectl -n ceph exec -it ceph-mon-0 -- ceph -s | awk "/health:/{print \$2}")
+ done
+ echo "${FUNCNAME[0]}: ceph status is 'HEALTH_OK'"
+ kubectl -n ceph exec -it ceph-mon-0 -- ceph -s
+
for node in $nodes; do
echo "${FUNCNAME[0]}: setup resolv.conf for $node"
ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
ubuntu@$node <<EOG
-cat <<EOF | sudo tee -a /etc/resolv.conf
+cat <<EOF | sudo tee /etc/resolv.conf
nameserver $kubedns
-search ceph.svc.cluster.local svc.cluster.local cluster.local
+search ceph.svc.cluster.local svc.cluster.local cluster.local
options ndots:5
EOF
EOG
@@ -74,6 +100,20 @@ EOG
./helm-install-ceph-osd.sh $name /dev/$dev
done
+ for node in $nodes; do
+ name=$(ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ ubuntu@$node hostname)
+ pod=$(kubectl get pods --namespace ceph | awk "/$name/{print \$1}")
+ echo "${FUNCNAME[0]}: verify ceph-osd is Running at node $name"
+ status=$(kubectl get pods --namespace ceph $pod | awk "/$pod/ {print \$3}")
+ while [[ "x$status" != "xRunning" ]]; do
+ echo "${FUNCNAME[0]}: $pod status is $status. Waiting 10 seconds for it to be Running."
+ sleep 10
+ status=$(kubectl get pods --namespace ceph $pod | awk "/$pod/ {print \$3}")
+ kubectl get pods --namespace ceph
+ done
+ done
+
echo "${FUNCNAME[0]}: WORKAROUND take ownership of .kube"
# TODO: find out why this is needed
sudo chown -R ubuntu:ubuntu ~/.kube/*
@@ -84,74 +124,46 @@ EOG
echo "${FUNCNAME[0]}: Relax access control rules"
kubectl replace -f relax-rbac-k8s1.7.yaml
- # TODO: verification tests
-
- echo "${FUNCNAME[0]}: Create rdb storageClass 'slow'"
- cat <<EOF >/tmp/ceph-sc.yaml
-apiVersion: storage.k8s.io/v1
-kind: StorageClass
-metadata:
- name: slow
-provisioner: kubernetes.io/rbd
-parameters:
- monitors: $mon_ip:6789
- adminId: admin
- adminSecretName: ceph-secret-admin
- adminSecretNamespace: "kube-system"
- pool: kube
- userId: kube
- userSecretName: ceph-secret-user
-EOF
+ echo "${FUNCNAME[0]}: Setup complete, running smoke tests"
+ echo "${FUNCNAME[0]}: Create a pool from a ceph-mon pod (e.g., ceph-mon-0)"
- kubectl create -f /tmp/ceph-sc.yaml
-
- echo "${FUNCNAME[0]}: Create storage pool 'kube'"
- # https://github.com/kubernetes/examples/blob/master/staging/persistent-volume-provisioning/README.md method
- sudo ceph osd pool create kube 32 32
-
- echo "${FUNCNAME[0]}: Authorize client 'kube' access to pool 'kube'"
- sudo ceph auth get-or-create client.kube mon 'allow r' osd 'allow rwx pool=kube'
-
- echo "${FUNCNAME[0]}: Create ceph-secret-user secret in namespace 'default'"
- kube_key=$(sudo ceph auth get-key client.kube)
- kubectl create secret generic ceph-secret-user --from-literal=key="$kube_key" --namespace=default --type=kubernetes.io/rbd
- # A similar secret must be created in other namespaces that intend to access the ceph pool
-
- # Per https://github.com/kubernetes/examples/blob/master/staging/persistent-volume-provisioning/README.md
-
- echo "${FUNCNAME[0]}: Create andtest a persistentVolumeClaim"
- cat <<EOF >/tmp/ceph-pvc.yaml
-{
- "kind": "PersistentVolumeClaim",
- "apiVersion": "v1",
- "metadata": {
- "name": "claim1",
- "annotations": {
- "volume.beta.kubernetes.io/storage-class": "slow"
- }
- },
- "spec": {
- "accessModes": [
- "ReadWriteOnce"
- ],
- "resources": {
- "requests": {
- "storage": "3Gi"
- }
- }
- }
-}
-EOF
- kubectl create -f /tmp/ceph-pvc.yaml
- while [[ "x$(kubectl get pvc -o jsonpath='{.status.phase}' claim1)" != "xBound" ]]; do
- echo "${FUNCNAME[0]}: Waiting for pvc claim1 to be 'Bound'"
- kubectl describe pvc
+ kubectl -n ceph exec -it ceph-mon-0 -- ceph osd pool create rbd 100 100
+
+ echo "${FUNCNAME[0]}: Create a pvc and check if the pvc status is Bound"
+
+ kubectl create -f tests/ceph/pvc.yaml
+ status=$(kubectl get pvc ceph-test -o json | jq -r '.status.phase')
+ while [[ "$status" != "Bound" ]]; do
+ echo "${FUNCNAME[0]}: pvc status is $status, waiting 10 seconds for it to be Bound"
sleep 10
+ status=$(kubectl get pvc ceph-test -o json | jq -r '.status.phase')
done
- echo "${FUNCNAME[0]}: pvc claim1 successfully bound to $(kubectl get pvc -o jsonpath='{.spec.volumeName}' claim1)"
- kubectl get pvc
- kubectl delete pvc claim1
- kubectl describe pods
+ echo "${FUNCNAME[0]}: pvc ceph-test successfully bound to $(kubectl get pvc -o jsonpath='{.spec.volumeName}' ceph-test)"
+ kubectl describe pvc
+
+ echo "${FUNCNAME[0]}: Attach the pvc to a job and check if the job is successful (i.e., 1)"
+ kubectl create -f tests/ceph/job.yaml
+ status=$(kubectl get jobs ceph-secret-generator -n ceph -o json | jq -r '.status.succeeded')
+ if [[ "$status" != "1" ]]; then
+ echo "${FUNCNAME[0]}: pvc attachment was not successful:"
+ kubectl get jobs ceph-secret-generator -n ceph -o json
+ exit 1
+ fi
+
+ echo "${FUNCNAME[0]}: Verify that the test job was successful"
+ pod=$(kubectl get pods --namespace default | awk "/ceph-test/{print \$1}")
+ active=$(kubectl get jobs --namespace default -o json ceph-test-job | jq -r '.status.active')
+ while [[ $active > 0 ]]; do
+ echo "${FUNCNAME[0]}: test job is still running, waiting 10 seconds for it to complete"
+ kubectl describe pods --namespace default $pod | awk '/Events:/{y=1;next}y'
+ sleep 10
+ active=$(kubectl get jobs --namespace default -o json ceph-test-job | jq -r '.status.active')
+ done
+ echo "${FUNCNAME[0]}: test job succeeded"
+
+ kubectl delete jobs ceph-secret-generator -n ceph
+ kubectl delete pvc ceph-test
+ echo "${FUNCNAME[0]}: Ceph setup complete!"
}
if [[ "$1" != "" ]]; then