From 85856375b27a98d9173d845ab509a16b6f4705fb Mon Sep 17 00:00:00 2001 From: Zhijiang Hu Date: Sun, 27 Aug 2017 23:37:49 -0400 Subject: Introduce deployment error recovery level Change-Id: I023a637e793bae845feabed0d7b80072d3f9f0b7 Signed-off-by: Zhijiang Hu --- ci/deploy/deploy.sh | 18 +++++--- docs/release/installation/index.rst | 1 + docs/release/installation/recovery.rst | 80 ++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 docs/release/installation/recovery.rst diff --git a/ci/deploy/deploy.sh b/ci/deploy/deploy.sh index 056f6b41..a104230d 100755 --- a/ci/deploy/deploy.sh +++ b/ci/deploy/deploy.sh @@ -32,6 +32,7 @@ OPTIONS: -w Workdir for temporary usage, optional -h Print this message and exit -s Deployment scenario + -S Skip recreate Daisy VM during deployment Description: Deploys the Daisy4NFV on the indicated lab resource @@ -62,6 +63,7 @@ POD_NAME='' TARGET_HOSTS_NUM=0 DRY_RUN=0 IS_BARE=1 +SKIP_DEPLOY_DAISY=0 VM_MULTINODE=("computer01" "computer02" "controller02" "controller03" "controller01") VALID_DEPLOY_SCENARIO=("os-nosdn-nofeature-noha" "os-nosdn-nofeature-ha" "os-odl_l3-nofeature-noha" "os-odl_l2-nofeature-noha" "os-odl_l3-nofeature-ha" "os-odl_l2-nofeature-ha" @@ -74,7 +76,7 @@ VALID_DEPLOY_SCENARIO=("os-nosdn-nofeature-noha" "os-nosdn-nofeature-ha" "os-odl ############################################################################ # BEGIN of main # -while getopts "b:B:Dd:n:l:p:r:w:s:h" OPTION +while getopts "b:B:Dd:n:l:p:r:w:s:Sh" OPTION do case $OPTION in b) @@ -104,6 +106,9 @@ do s) DEPLOY_SCENARIO=${OPTARG} ;; + S) + SKIP_DEPLOY_DAISY=1 + ;; h) usage exit 0 @@ -427,11 +432,12 @@ function config_daisy() clean_up_target_vms_and_networks -#TODO: These steps shall be done only for the first time -clean_up_daisy_vm_and_networks -create_daisy_vm_and_networks -install_daisy -config_daisy +if [ ! $SKIP_DEPLOY_DAISY -eq 1 ]; then + clean_up_daisy_vm_and_networks + create_daisy_vm_and_networks + install_daisy + config_daisy +fi echo "====== prepare cluster and pxe ======" diff --git a/docs/release/installation/index.rst b/docs/release/installation/index.rst index 8c5a3da7..a4836e53 100644 --- a/docs/release/installation/index.rst +++ b/docs/release/installation/index.rst @@ -15,4 +15,5 @@ OPNFV Daisy4nfv Installation Guide installation_guide.rst bmdeploy.rst vmdeploy.rst + recovery.rst diff --git a/docs/release/installation/recovery.rst b/docs/release/installation/recovery.rst new file mode 100644 index 00000000..7a49e693 --- /dev/null +++ b/docs/release/installation/recovery.rst @@ -0,0 +1,80 @@ +.. This work is licensed under a Creative Commons Attribution 4.0 International Licence. +.. http://creativecommons.org/licenses/by/4.0 + +Deployment Error Recovery Guide +=============================== + +Deployment may fail due to different kinds of reasons, such as Daisy VM creation +error, target nodes failure during OS installation, or Kolla deploy command +error. Different errors can be grouped into several error levels. We define +Recovery Levels below to fulfill recover requirements in different error levels. + +1. Recovery Level 0 +------------------- + +This level restart whole deployment again. Mainly to retry to solve errors such +as Daisy VM creation failed. For example we use the following command to do +virtual deployment(in the jump host): + + +.. code-block:: console + + sudo ./ci/deploy/deploy.sh -b ./ -l zte -p virtual1 -s os-nosdn-nofeature-ha + + + +If command failed because of Daisy VM creation error, then redo above command +will restart whole deployment which includes rebuild the daisy VM image and +restart Daisy VM. + + +2. Recovery Level 1 +------------------- + +If Daisy VM was created successfully, but bugs was encountered in Daisy code +or software of target OS which prevent deployment from being done, in this case, +the user or the developer does not want to recreate the Daisy VM again during +next deployment process but just to modify some pieces of code in it. To achieve +this, he/she can redo deployment by deleting all clusters and hosts first(in the +Daisy VM): + + +.. code-block:: console + + source /root/daisyrc_admin + for i in `daisy cluster-list | awk -F "|" '{print $2}' | sed -n '4p' | tr -d " "`;do daisy cluster-delete $i;done + for i in `daisy host-list | awk -F "|" '{print $2}'| grep -o "[^ ]\+\( \+[^ ]\+\)*"|tail -n +2`;do daisy host-delete $i;done + + + +Then, adjust deployment command as below and run it again(in the jump host): + + +.. code-block:: console + + sudo ./ci/deploy/deploy.sh -S -b ./ -l zte -p virtual1 -s os-nosdn-nofeature-ha + + + +Pay attention to the "-S" argument above, it lets the deployment process to +skip re-creating Daisy VM and use the existing one. + + +3. Recovery Level 2 +------------------- + +If both Daisy VM and target node's OS are OK, but error ocurred when doing +OpenStack deployment, then there is even no need to re-install target OS for +the deployment retrying. In this level, all we need to do is just retry the +Daisy deployment command as follows(in the Daisy VM): + + +.. code-block:: console + + source /root/daisyrc_admin + daisy uninstall + daisy install + + + +This basically do kolla-ansible destroy and kolla-asnible deploy. -- cgit 1.2.3-korg