aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQiLiang <liangqi1@huawei.com>2016-10-14 06:11:07 +0800
committerQiLiang <liangqi1@huawei.com>2016-10-15 05:28:32 +0800
commitfaed52cdedf1b067c3f39d49827cb843d55edd45 (patch)
tree8d34ec4e9e44167b067b30713bf231eff4fdfc58
parentc85cae06ce80e7090b707974a50ab32de4aec28b (diff)
Add recovery scripts
- add recovery shell scripts - add boot-recovery role to stop services during system boot which could cause boot pending - configure nfs mount in /etc/fstab cause system boot pending, so mount nfs during ansible-playbook run. - kill mysqld before mysql recovery, running mysqld may cause mysql recover failure JIRA: COMPASS-474 Change-Id: I0f6f0ee935fbe3fbbe28a451a02decfb01a6165b Signed-off-by: QiLiang <liangqi1@huawei.com>
-rw-r--r--deploy/adapters/ansible/openstack/HA-ansible-multinodes.yml7
-rw-r--r--deploy/adapters/ansible/openstack_mitaka/HA-ansible-multinodes.yml7
-rw-r--r--deploy/adapters/ansible/openstack_mitaka/roles/glance/tasks/nfs.yml8
-rw-r--r--deploy/adapters/ansible/openstack_mitaka_xenial/HA-ansible-multinodes.yml7
-rw-r--r--deploy/adapters/ansible/openstack_mitaka_xenial/roles/glance/tasks/nfs.yml8
-rw-r--r--deploy/adapters/ansible/openstack_newton_xenial/HA-ansible-multinodes.yml7
-rwxr-xr-xdeploy/adapters/ansible/roles/boot-recovery/tasks/main.yml26
-rwxr-xr-xdeploy/adapters/ansible/roles/boot-recovery/vars/Debian.yml14
-rwxr-xr-xdeploy/adapters/ansible/roles/boot-recovery/vars/RedHat.yml15
-rwxr-xr-xdeploy/adapters/ansible/roles/boot-recovery/vars/main.yml11
-rw-r--r--deploy/adapters/ansible/roles/controller-recovery/vars/Debian.yml1
-rw-r--r--deploy/adapters/ansible/roles/controller-recovery/vars/RedHat.yml1
-rw-r--r--deploy/adapters/ansible/roles/database/tasks/mariadb_cluster_debian.yml8
-rw-r--r--deploy/adapters/ansible/roles/database/tasks/mariadb_cluster_redhat.yml8
-rw-r--r--deploy/adapters/ansible/roles/glance/tasks/main.yml2
-rw-r--r--deploy/adapters/ansible/roles/glance/tasks/nfs.yml8
-rwxr-xr-xdeploy/compass_vm.sh72
-rwxr-xr-xdeploy/host_virtual.sh13
-rwxr-xr-xdeploy/launch.sh12
-rwxr-xr-xdeploy/network.sh41
-rw-r--r--deploy/recovery.sh40
-rw-r--r--deploy/template/power/ipmitool.tmpl30
-rwxr-xr-xrecovery.sh15
23 files changed, 342 insertions, 19 deletions
diff --git a/deploy/adapters/ansible/openstack/HA-ansible-multinodes.yml b/deploy/adapters/ansible/openstack/HA-ansible-multinodes.yml
index 7f61a1cf..95102d2b 100644
--- a/deploy/adapters/ansible/openstack/HA-ansible-multinodes.yml
+++ b/deploy/adapters/ansible/openstack/HA-ansible-multinodes.yml
@@ -238,6 +238,13 @@
accelerate: true
max_fail_percentage: 0
roles:
+ - boot-recovery
+
+- hosts: controller
+ remote_user: root
+ accelerate: true
+ max_fail_percentage: 0
+ roles:
- controller-recovery
- hosts: compute
diff --git a/deploy/adapters/ansible/openstack_mitaka/HA-ansible-multinodes.yml b/deploy/adapters/ansible/openstack_mitaka/HA-ansible-multinodes.yml
index 7ef467ee..c04445d8 100644
--- a/deploy/adapters/ansible/openstack_mitaka/HA-ansible-multinodes.yml
+++ b/deploy/adapters/ansible/openstack_mitaka/HA-ansible-multinodes.yml
@@ -247,6 +247,13 @@
accelerate: true
max_fail_percentage: 0
roles:
+ - boot-recovery
+
+- hosts: controller
+ remote_user: root
+ accelerate: true
+ max_fail_percentage: 0
+ roles:
- controller-recovery
- hosts: compute
diff --git a/deploy/adapters/ansible/openstack_mitaka/roles/glance/tasks/nfs.yml b/deploy/adapters/ansible/openstack_mitaka/roles/glance/tasks/nfs.yml
index 07dfacdd..deec81f8 100644
--- a/deploy/adapters/ansible/openstack_mitaka/roles/glance/tasks/nfs.yml
+++ b/deploy/adapters/ansible/openstack_mitaka/roles/glance/tasks/nfs.yml
@@ -42,10 +42,14 @@
- name: get mount info
command: mount
register: mount_info
+ tags:
+ - recovery
- name: get nfs server
shell: awk -F'=' '/compass_server/ {print $2}' /etc/compass.conf
register: ip_info
+ tags:
+ - recovery
- name: restart host nfs service
service: name={{ item }} state=restarted enabled=yes
@@ -55,7 +59,9 @@
shell: |
mount -t nfs -onfsvers=3 {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images
sed -i '/\/var\/lib\/glance\/images/d' /etc/fstab
- echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
+ #echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
when: mount_info.stdout.find('images') == -1
retries: 5
delay: 3
+ tags:
+ - recovery
diff --git a/deploy/adapters/ansible/openstack_mitaka_xenial/HA-ansible-multinodes.yml b/deploy/adapters/ansible/openstack_mitaka_xenial/HA-ansible-multinodes.yml
index ec4c53f4..ac31b682 100644
--- a/deploy/adapters/ansible/openstack_mitaka_xenial/HA-ansible-multinodes.yml
+++ b/deploy/adapters/ansible/openstack_mitaka_xenial/HA-ansible-multinodes.yml
@@ -247,6 +247,13 @@
accelerate: true
max_fail_percentage: 0
roles:
+ - boot-recovery
+
+- hosts: controller
+ remote_user: root
+ accelerate: true
+ max_fail_percentage: 0
+ roles:
- controller-recovery
- hosts: compute
diff --git a/deploy/adapters/ansible/openstack_mitaka_xenial/roles/glance/tasks/nfs.yml b/deploy/adapters/ansible/openstack_mitaka_xenial/roles/glance/tasks/nfs.yml
index 07dfacdd..deec81f8 100644
--- a/deploy/adapters/ansible/openstack_mitaka_xenial/roles/glance/tasks/nfs.yml
+++ b/deploy/adapters/ansible/openstack_mitaka_xenial/roles/glance/tasks/nfs.yml
@@ -42,10 +42,14 @@
- name: get mount info
command: mount
register: mount_info
+ tags:
+ - recovery
- name: get nfs server
shell: awk -F'=' '/compass_server/ {print $2}' /etc/compass.conf
register: ip_info
+ tags:
+ - recovery
- name: restart host nfs service
service: name={{ item }} state=restarted enabled=yes
@@ -55,7 +59,9 @@
shell: |
mount -t nfs -onfsvers=3 {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images
sed -i '/\/var\/lib\/glance\/images/d' /etc/fstab
- echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
+ #echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
when: mount_info.stdout.find('images') == -1
retries: 5
delay: 3
+ tags:
+ - recovery
diff --git a/deploy/adapters/ansible/openstack_newton_xenial/HA-ansible-multinodes.yml b/deploy/adapters/ansible/openstack_newton_xenial/HA-ansible-multinodes.yml
index 3d5b0a1c..9e8ec15b 100644
--- a/deploy/adapters/ansible/openstack_newton_xenial/HA-ansible-multinodes.yml
+++ b/deploy/adapters/ansible/openstack_newton_xenial/HA-ansible-multinodes.yml
@@ -247,6 +247,13 @@
accelerate: true
max_fail_percentage: 0
roles:
+ - boot-recovery
+
+- hosts: controller
+ remote_user: root
+ accelerate: true
+ max_fail_percentage: 0
+ roles:
- controller-recovery
- hosts: compute
diff --git a/deploy/adapters/ansible/roles/boot-recovery/tasks/main.yml b/deploy/adapters/ansible/roles/boot-recovery/tasks/main.yml
new file mode 100755
index 00000000..67206bf6
--- /dev/null
+++ b/deploy/adapters/ansible/roles/boot-recovery/tasks/main.yml
@@ -0,0 +1,26 @@
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+---
+- name: Register RECOVERY
+ set_fact: RECOVERY_ENV={{RECOVERY_ENV | default('False')}}
+ tags:
+ - recovery-stop-service
+
+- include_vars: "{{ ansible_os_family }}.yml"
+ when: RECOVERY_ENV
+ tags:
+ - recovery-stop-service
+
+- name: stop controller services
+ service: name={{ item }} state=stopped enabled=yes
+ with_items: controller_services | union(controller_services_noarch)
+ when: RECOVERY_ENV
+ tags:
+ - recovery-stop-service
+
diff --git a/deploy/adapters/ansible/roles/boot-recovery/vars/Debian.yml b/deploy/adapters/ansible/roles/boot-recovery/vars/Debian.yml
new file mode 100755
index 00000000..084deebc
--- /dev/null
+++ b/deploy/adapters/ansible/roles/boot-recovery/vars/Debian.yml
@@ -0,0 +1,14 @@
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+---
+controller_services:
+ - cron
+ - aodh-expirer
+ - neutron-openvswitch-agent
+ - mysql
diff --git a/deploy/adapters/ansible/roles/boot-recovery/vars/RedHat.yml b/deploy/adapters/ansible/roles/boot-recovery/vars/RedHat.yml
new file mode 100755
index 00000000..c46f79c8
--- /dev/null
+++ b/deploy/adapters/ansible/roles/boot-recovery/vars/RedHat.yml
@@ -0,0 +1,15 @@
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+---
+controller_services:
+ - cron
+ - neutron-openvswitch-agent
+ - openstack-aodh-expirer
+ - mysql
+
diff --git a/deploy/adapters/ansible/roles/boot-recovery/vars/main.yml b/deploy/adapters/ansible/roles/boot-recovery/vars/main.yml
new file mode 100755
index 00000000..22af29f4
--- /dev/null
+++ b/deploy/adapters/ansible/roles/boot-recovery/vars/main.yml
@@ -0,0 +1,11 @@
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+---
+controller_services_noarch: []
+
diff --git a/deploy/adapters/ansible/roles/controller-recovery/vars/Debian.yml b/deploy/adapters/ansible/roles/controller-recovery/vars/Debian.yml
index 34675f6b..62753413 100644
--- a/deploy/adapters/ansible/roles/controller-recovery/vars/Debian.yml
+++ b/deploy/adapters/ansible/roles/controller-recovery/vars/Debian.yml
@@ -37,4 +37,5 @@ controller_services:
- aodh-notifier
- aodh-evaluator
- aodh-listener
+ - cron
diff --git a/deploy/adapters/ansible/roles/controller-recovery/vars/RedHat.yml b/deploy/adapters/ansible/roles/controller-recovery/vars/RedHat.yml
index 35c0a955..145acecd 100644
--- a/deploy/adapters/ansible/roles/controller-recovery/vars/RedHat.yml
+++ b/deploy/adapters/ansible/roles/controller-recovery/vars/RedHat.yml
@@ -36,4 +36,5 @@ controller_services:
- openstack-aodh-notifier
- openstack-aodh-evaluator
- openstack-aodh-listener
+ - cron
diff --git a/deploy/adapters/ansible/roles/database/tasks/mariadb_cluster_debian.yml b/deploy/adapters/ansible/roles/database/tasks/mariadb_cluster_debian.yml
index 6b670312..f083a40f 100644
--- a/deploy/adapters/ansible/roles/database/tasks/mariadb_cluster_debian.yml
+++ b/deploy/adapters/ansible/roles/database/tasks/mariadb_cluster_debian.yml
@@ -7,6 +7,14 @@
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
---
+- name: Register RECOVERY
+ set_fact: RECOVERY_ENV={{RECOVERY_ENV | default('False')}}
+
+- name: killall mysqld processes
+ shell: sudo killall -9 mysqld
+ when: RECOVERY_ENV
+ ignore_errors: True
+
- name: get cluster status
shell: mysql --silent --skip-column-names -e 'SHOW STATUS LIKE "wsrep_evs_state"'|awk '{print $2}'
register: cluster_status
diff --git a/deploy/adapters/ansible/roles/database/tasks/mariadb_cluster_redhat.yml b/deploy/adapters/ansible/roles/database/tasks/mariadb_cluster_redhat.yml
index da1b863c..cfd778f1 100644
--- a/deploy/adapters/ansible/roles/database/tasks/mariadb_cluster_redhat.yml
+++ b/deploy/adapters/ansible/roles/database/tasks/mariadb_cluster_redhat.yml
@@ -7,6 +7,14 @@
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
---
+- name: Register RECOVERY
+ set_fact: RECOVERY_ENV={{RECOVERY_ENV | default('False')}}
+
+- name: killall mysqld processes
+ shell: sudo killall -9 mysqld
+ when: RECOVERY_ENV
+ ignore_errors: True
+
- name: get cluster status
shell: mysql --silent --skip-column-names -e 'SHOW STATUS LIKE "wsrep_evs_state"'|awk '{print $2}'
register: cluster_status
diff --git a/deploy/adapters/ansible/roles/glance/tasks/main.yml b/deploy/adapters/ansible/roles/glance/tasks/main.yml
index a78ba771..caece26c 100644
--- a/deploy/adapters/ansible/roles/glance/tasks/main.yml
+++ b/deploy/adapters/ansible/roles/glance/tasks/main.yml
@@ -8,6 +8,8 @@
##############################################################################
---
- include_vars: "{{ ansible_os_family }}.yml"
+ tags:
+ - recovery
- include: glance_install.yml
tags:
diff --git a/deploy/adapters/ansible/roles/glance/tasks/nfs.yml b/deploy/adapters/ansible/roles/glance/tasks/nfs.yml
index 7895c386..179229de 100644
--- a/deploy/adapters/ansible/roles/glance/tasks/nfs.yml
+++ b/deploy/adapters/ansible/roles/glance/tasks/nfs.yml
@@ -38,10 +38,14 @@
- name: get mount info
command: mount
register: mount_info
+ tags:
+ - recovery
- name: get nfs server
shell: awk -F'=' '/compass_server/ {print $2}' /etc/compass.conf
register: ip_info
+ tags:
+ - recovery
- name: restart host nfs service
service: name={{ item }} state=restarted enabled=yes
@@ -51,7 +55,9 @@
shell: |
mount -t nfs -onfsvers=3 {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images
sed -i '/\/var\/lib\/glance\/images/d' /etc/fstab
- echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
+ #echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
when: mount_info.stdout.find('images') == -1
retries: 5
delay: 3
+ tags:
+ - recovery
diff --git a/deploy/compass_vm.sh b/deploy/compass_vm.sh
index dc391acc..7e2ce40b 100755
--- a/deploy/compass_vm.sh
+++ b/deploy/compass_vm.sh
@@ -151,3 +151,75 @@ function launch_compass() {
set +e
log_info "launch_compass exit"
}
+
+function recover_compass() {
+ log_info "recover_compass enter"
+
+ sudo virsh start compass
+
+ if ! wait_ok 500;then
+ log_error "install os timeout"
+ exit 1
+ fi
+
+ log_info "launch_compass exit"
+}
+
+function _check_hosts_reachable() {
+ retry=0
+
+ while true; do
+ sleep 1
+ let retry+=1
+ if [[ $retry -ge $1 ]]; then
+ log_error "hosts boot time out"
+ echo "fail"
+ return
+ fi
+
+ ssh $ssh_args root@$MGMT_IP "
+ cd /var/ansible/run/$ADAPTER_NAME'-'$CLUSTER_NAME;
+ ansible -i inventories/inventory.yml $2 -m ping
+ " > /dev/null
+ if [ $? == 0 ]; then
+ break
+ fi
+ done
+ echo "ok"
+}
+
+function check_hosts_reachable() {
+ ret=$(_check_hosts_reachable $1 compute)
+ if [[ "$ret" == "fail" ]]; then
+ echo $ret
+ return
+ fi
+
+ ret=$(_check_hosts_reachable 100 controller)
+ echo $ret
+}
+
+function recover_hosts() {
+ ssh $ssh_args root@$MGMT_IP "
+ cd /var/ansible/run/$ADAPTER_NAME'-'$CLUSTER_NAME;
+ ansible-playbook \
+ -i inventories/inventory.yml HA-ansible-multinodes.yml \
+ -t recovery \
+ -e 'RECOVERY_ENV=True'
+ "
+ if [ $? == 0 ]; then
+ echo "Recovery Complete!"
+ fi
+}
+
+function wait_controller_nodes_ok() {
+ sleep 100
+ ssh $ssh_args root@$MGMT_IP "
+ cd /var/ansible/run/$ADAPTER_NAME'-'$CLUSTER_NAME;
+ ansible-playbook \
+ -i inventories/inventory.yml HA-ansible-multinodes.yml \
+ -t recovery-stop-service \
+ -e 'RECOVERY_ENV=True'
+ "
+ sleep 30
+}
diff --git a/deploy/host_virtual.sh b/deploy/host_virtual.sh
index 2fab2c9d..0a991f11 100755
--- a/deploy/host_virtual.sh
+++ b/deploy/host_virtual.sh
@@ -54,6 +54,19 @@ function launch_host_vms() {
IFS=$old_ifs
}
+function recover_host_vms() {
+ old_ifs=$IFS
+ IFS=,
+
+ for host in $HOSTNAMES; do
+ sudo virsh destroy $host
+ sleep 2
+ sudo virsh start $host
+ sleep 2
+ done
+ IFS=$old_ifs
+}
+
function get_host_macs() {
local mac_generator=${COMPASS_DIR}/deploy/mac_generator.sh
local machines=
diff --git a/deploy/launch.sh b/deploy/launch.sh
index 976af3c0..6db9f362 100755
--- a/deploy/launch.sh
+++ b/deploy/launch.sh
@@ -12,7 +12,8 @@ WORK_DIR=$COMPASS_DIR/work/deploy
mkdir -p $WORK_DIR/script
-export DEPLOY_FIRST_TIME=${DEPLOY_FIRST_TIME-"true"}
+export DEPLOY_FIRST_TIME=${DEPLOY_FIRST_TIME:-"true"}
+export DEPLOY_RECOVERY=${DEPLOY_RECOVERY:-"false"}
source ${COMPASS_DIR}/deploy/prepare.sh
prepare_python_env
@@ -31,9 +32,14 @@ source ${COMPASS_DIR}/deploy/compass_vm.sh
source ${COMPASS_DIR}/deploy/deploy_host.sh
######################### main process
-if [[ "$EXPANSION" == "false" ]]
-then
+if [[ "$DEPLOY_RECOVERY" == "true" ]]; then
+ source ${COMPASS_DIR}/deploy/recovery.sh
+ recover_cluster
+ exit 0
+fi
+
+if [[ "$EXPANSION" == "false" ]]; then
print_logo
if [[ ! -z $VIRT_NUMBER ]];then
diff --git a/deploy/network.sh b/deploy/network.sh
index 46b8c023..6c678222 100755
--- a/deploy/network.sh
+++ b/deploy/network.sh
@@ -29,6 +29,13 @@ function setup_bridge_net()
sudo virsh net-start $net_name
}
+function recover_bridge_net()
+{
+ net_name=$1
+
+ sudo virsh net-start $net_name
+}
+
function save_network_info()
{
sudo ovs-vsctl list-br |grep br-external
@@ -69,6 +76,13 @@ function setup_bridge_external()
python $COMPASS_DIR/deploy/setup_vnic.py
}
+function recover_bridge_external()
+{
+ sudo virsh net-start external
+
+ python $COMPASS_DIR/deploy/setup_vnic.py
+}
+
function setup_nat_net() {
net_name=$1
gw=$2
@@ -92,11 +106,20 @@ function setup_nat_net() {
sudo virsh net-start $net_name
}
+function recover_nat_net() {
+ net_name=$1
+
+ sudo virsh net-start $net_name
+}
function setup_virtual_net() {
setup_nat_net install $INSTALL_GW $INSTALL_MASK
}
+function recover_virtual_net() {
+ recover_nat_net install
+}
+
function setup_baremetal_net() {
if [[ -z $INSTALL_NIC ]]; then
exit 1
@@ -104,6 +127,13 @@ function setup_baremetal_net() {
setup_bridge_net install $INSTALL_NIC
}
+function recover_baremetal_net() {
+ if [[ -z $INSTALL_NIC ]]; then
+ exit 1
+ fi
+ recover_bridge_net install
+}
+
function setup_network_boot_scripts() {
sudo cp $COMPASS_DIR/deploy/network.sh /usr/sbin/network_setup
sudo chmod +777 /usr/sbin/network_setup
@@ -134,3 +164,14 @@ function create_nets() {
setup_network_boot_scripts
}
+function recover_nets() {
+ recover_nat_net mgmt
+
+ # recover install network
+ recover_"$TYPE"_net
+
+ # recover external network
+ recover_bridge_external
+ clear_forward_rejct_rules
+}
+
diff --git a/deploy/recovery.sh b/deploy/recovery.sh
new file mode 100644
index 00000000..db85848f
--- /dev/null
+++ b/deploy/recovery.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+function recover_cluster() {
+ recover_nets
+ recover_compass
+
+ i=0
+ MAX_RETRY_TIMES=2
+ while [ $i -lt $MAX_RETRY_TIMES ]; do
+ let i+=1
+
+ if [[ ! -z $VIRT_NUMBER ]];then
+ recover_host_vms
+ else
+ reboot_hosts
+ fi
+
+ ret=$(check_hosts_reachable 500)
+ if [[ "$ret" == "ok" ]];then
+ break
+ fi
+ done
+
+ if [[ $i -ge $MAX_RETRY_TIMES ]]; then
+ echo "Recovery Failure !!!"
+ exit 1
+ fi
+
+ wait_controller_nodes_ok
+ recover_hosts
+}
+
diff --git a/deploy/template/power/ipmitool.tmpl b/deploy/template/power/ipmitool.tmpl
index a297e001..048e997a 100644
--- a/deploy/template/power/ipmitool.tmpl
+++ b/deploy/template/power/ipmitool.tmpl
@@ -40,19 +40,23 @@ for i in {1..5}; do
fi
done
sleep 1
-for i in {1..5}; do
- if ipmitool -I $interface -H $ipmiIp -U $ipmiUser -P $ipmiPass chassis bootdev pxe >/dev/null 2>&1
- then
- break
- elif [[ i -lt 5 ]]
- then
- sleep 1
- else
- log_error "set $ipmiIp pxe fail"
- exit 1
- fi
-done
-sleep 1
+
+if [[ "\$DEPLOY_RECOVERY" != "true" ]]; then
+ for i in {1..5}; do
+ if ipmitool -I $interface -H $ipmiIp -U $ipmiUser -P $ipmiPass chassis bootdev pxe >/dev/null 2>&1
+ then
+ break
+ elif [[ i -lt 5 ]]
+ then
+ sleep 1
+ else
+ log_error "set $ipmiIp pxe fail"
+ exit 1
+ fi
+ done
+ sleep 1
+fi
+
for i in {1..5}; do
if ipmitool -I $interface -H $ipmiIp -U $ipmiUser -P $ipmiPass chassis power reset >/dev/null 2>&1
then
diff --git a/recovery.sh b/recovery.sh
new file mode 100755
index 00000000..1b188620
--- /dev/null
+++ b/recovery.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+export DEPLOY_RECOVERY="true"
+export DEPLOY_FIRST_TIME="false"
+
+./run.sh
+