From 793d26c27c1e24d0e15e1e882f68841446b095ac Mon Sep 17 00:00:00 2001 From: Martin Klozik Date: Thu, 6 Dec 2018 08:24:51 +0100 Subject: ONAP installation improvements Several modifications of installation process were made to improve stability during CI runs. New features: * increased robustness * increased number of K8S slaves * even VM dispatching among computes to avoid overcommitting of compute nodes which lead to K8S slaves crashes * report installation status JIRA: AUTO-79 Change-Id: I6eca0a7203dce0256dc914028989d3fb21d532e7 Signed-off-by: Martin Klozik --- ci/deploy-onap.sh | 109 ++++++++++++++++++++++++++---------------------------- 1 file changed, 53 insertions(+), 56 deletions(-) (limited to 'ci/deploy-onap.sh') diff --git a/ci/deploy-onap.sh b/ci/deploy-onap.sh index 4d7e3b3..c34eb56 100755 --- a/ci/deploy-onap.sh +++ b/ci/deploy-onap.sh @@ -61,6 +61,19 @@ fi # # Installation # + +# use standalone K8S master if there are enough VMs available for the K8S cluster +SERVERS_COUNT=$(echo $SERVERS | wc -w) +if [ $SERVERS_COUNT -gt 2 ] ; then + RANCHER_SLAVES=$SLAVES +else + RANCHER_SLAVES=$SERVERS +fi + +echo "INSTALLATION TOPOLOGY:" +echo "Rancher Master: $MASTER" +echo "Rancher Slaves: $RANCHER_SLAVES" +echo echo "INSTALLING DOCKER ON ALL MACHINES" echo "$SERVERS" @@ -223,9 +236,9 @@ HOSTREGTOKEN=$(ssh $SSH_OPTIONS $SSH_USER@"$MASTER" cat /tmp/rancher_register_ho echo "$HOSTREGTOKEN" echo "REGISTERING HOSTS WITH RANCHER ENVIRONMENT '$ENVIRON'" -echo "$SERVERS" +echo "$RANCHER_SLAVES" -for MACHINE in $SERVERS; +for MACHINE in $RANCHER_SLAVES; do ssh $SSH_OPTIONS $SSH_USER@"$MACHINE" "bash -s" < \$TMP_POD_LIST - return \$(cat \$TMP_POD_LIST | wc -l) + kubectl get pods --namespace $ENVIRON > $TMP_POD_LIST + return \$(cat $TMP_POD_LIST | wc -l) } -FAILED_PODS_LIMIT=1 # maximal number of falied ONAP PODs -ALL_PODS_LIMIT=20 # minimum ONAP PODs to be up & running -MAX_WAIT_PERIODS=500 # over 2 hours +FAILED_PODS_LIMIT=1 # maximal number of failed ONAP PODs +ALL_PODS_LIMIT=20 # minimum ONAP PODs to be up & running +WAIT_PERIOD=60 # wait period in seconds +MAX_WAIT_TIME=\$((3600*3)) # max wait time in seconds +MAX_WAIT_PERIODS=\$((\$MAX_WAIT_TIME/\$WAIT_PERIOD)) COUNTER=0 get_onap_pods ALL_PODS=\$? -PENDING=\$(grep -E '0/|1/2' \$TMP_POD_LIST | wc -l) +PENDING=\$(grep -E '0/|1/2' $TMP_POD_LIST | wc -l) while [ \$PENDING -gt \$FAILED_PODS_LIMIT -o \$ALL_PODS -lt \$ALL_PODS_LIMIT ]; do - # print header every 20th lines + # print header every 20th line if [ \$COUNTER -eq \$((\$COUNTER/20*20)) ] ; then printf "%-3s %-29s %-3s/%s\n" "Nr." "Datetime of check" "Err" "Total PODs" fi COUNTER=\$((\$COUNTER+1)) printf "%3s %-29s %3s/%-3s\n" \$COUNTER "\$(date)" \$PENDING \$ALL_PODS - sleep 15 + sleep \$WAIT_PERIOD if [ "\$MAX_WAIT_PERIODS" -eq \$COUNTER ]; then FAILED_PODS_LIMIT=800 ALL_PODS_LIMIT=0 fi get_onap_pods ALL_PODS=\$? - PENDING=\$(grep -E '0/|1/2' \$TMP_POD_LIST | wc -l) + PENDING=\$(grep -E '0/|1/2' $TMP_POD_LIST | wc -l) done -echo "Report on non-running containers" get_onap_pods -grep -E '0/|1/2' \$TMP_POD_LIST +cp $TMP_POD_LIST ~/onap_all_pods.txt echo - -echo "sleep 5 min - to allow rest frameworks to finish at \$(date)" -sleep 5m -echo "run healthcheck 2 times to warm caches and frameworks"\ - "so rest endpoints report properly - see OOM-447" - -echo "curl with aai cert to cloud-region PUT" -curl -X PUT https://127.0.0.1:30233/aai/v11/cloud-infrastructure/\ -cloud-regions/cloud-region/CloudOwner/RegionOne \ ---data "@aai-cloud-region-put.json" \ --H "authorization: Basic TW9kZWxMb2FkZXI6TW9kZWxMb2FkZXI=" \ --H "X-TransactionId:jimmy-postman" \ --H "X-FromAppId:AAI" \ --H "Content-Type:application/json" \ --H "Accept:application/json" \ ---cacert aaiapisimpledemoopenecomporg_20171003.crt -k - -echo "get the cloud region back" -curl -X GET https://127.0.0.1:30233/aai/v11/cloud-infrastructure/\ -cloud-regions/ \ --H "authorization: Basic TW9kZWxMb2FkZXI6TW9kZWxMb2FkZXI=" \ --H "X-TransactionId:jimmy-postman" \ --H "X-FromAppId:AAI" \ --H "Content-Type:application/json" \ --H "Accept:application/json" \ ---cacert aaiapisimpledemoopenecomporg_20171003.crt -k - -# OOM-484 - robot scripts moved +echo "========================" +echo "ONAP INSTALLATION REPORT" +echo "========================" +echo +echo "List of Failed PODs" +echo "-------------------" +grep -E '0/|1/2' $TMP_POD_LIST | tee ~/onap_failed_pods.txt +echo +echo "Summary:" +echo "--------" +echo " PODs Failed: \$(cat ~/onap_failed_pods.txt | wc -l)" +echo " PODs Total: \$(cat ~/onap_all_pods.txt | wc -l)" +echo +echo "ONAP health TC results" +echo "----------------------" cd oom/kubernetes/robot -echo -e "\nrun healthcheck prep 1" -# OOM-722 adds namespace parameter -./ete-k8s.sh $ENVIRON health > ~/health1.out -echo "sleep 5 min at \$(date)" -sleep 5m - -echo "run healthcheck prep 2" -./ete-k8s.sh $ENVIRON health > ~/health2.out - -echo "run healthcheck for real - wait a further 5 min at \$(date)" -sleep 5m -./ete-k8s.sh $ENVIRON health +./ete-k8s.sh $ENVIRON health | tee ~/onap_health.txt +echo "===============================" +echo "END OF ONAP INSTALLATION REPORT" +echo "===============================" OOMDEPLOY echo "Finished install, ruturned from Master at $(date)" -- cgit 1.2.3-korg