diff options
author | SerenaFeng <feng.xiaowei@zte.com.cn> | 2017-09-21 11:16:44 +0800 |
---|---|---|
committer | SerenaFeng <feng.xiaowei@zte.com.cn> | 2017-09-21 11:34:28 +0800 |
commit | 191072605a57185758936ff02af0298a1ec7e592 (patch) | |
tree | 45f6323ee10441689bf50a2ea7bd93ad41eecb3d /opts | |
parent | c4a49c82cd4fd3c21b3384960bf3b351f14d6cf4 (diff) |
bugfix: autoupdate failed due to mingle with watchdog
1. In the original design, if the auto-job starts soon after
the watchdog starts, the auto update might be mingled with
connectivity check, which may lead to update failure. so instead,
the connectivity is checked first, if failed and module is not in deploying,
restart module.
2. only automate-docker-deploy job will impact the container status during
auto-job, so substitue auto-job status check with automate-docker-deploy's
3. the watchdog is not only for testapi, but also for reporting, and
all docker container server deployed in testresults.opnfv.org, so move it under
utils/test/opts, which is leveraged to store tool-sets employed for testing tools.
Change-Id: I766f3a534a3d510ce7509d4e742150150ccd8f54
Signed-off-by: SerenaFeng <feng.xiaowei@zte.com.cn>
Diffstat (limited to 'opts')
-rw-r--r-- | opts/watchdog.sh | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/opts/watchdog.sh b/opts/watchdog.sh new file mode 100644 index 0000000..51868d7 --- /dev/null +++ b/opts/watchdog.sh @@ -0,0 +1,162 @@ +# * +# http://www.apache.org/licenses/LICENSE-2.0 * +# * +# Unless required by applicable law or agreed to in writing, * +# software distributed under the License is distributed on an * +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * +# KIND, either express or implied. See the License for the * +# specific language governing permissions and limitations * +# under the License. * + +# This script checks if deployments are working or and then +# starts the specified containers in case one of the containers +# crash. The only solution is restarting docker as of now. + +#!/bin/bash + +## List of modules +modules=(testapi reporting) + +## Ports of the modules +declare -A ports=( ["testapi"]="8082" ["reporting"]="8084") + +## Urls to check if the modules are deployed or not ? +declare -A urls=( ["testapi"]="http://testresults.opnfv.org/test/" \ + ["reporting"]="http://testresults.opnfv.org/reporting/index.html") + +### Functions related to checking. + +function is_deploying() { + xml=$(curl -m10 "https://build.opnfv.org/ci/job/${1}-automate-docker-deploy-master/lastBuild/api/xml?depth=1") + building=$(grep -oPm1 "(?<=<building>)[^<]+" <<< "$xml") + if [[ $building == "false" ]] + then + false + else + true + fi +} + +function get_docker_status() { + status=$(service docker status | sed -n 3p | cut -d ' ' -f5) + echo -e "Docker status: $status" + if [ $status = "active" ] + then + true + else + false + fi +} + +function check_connectivity() { + echo "Checking $1 connection : $2" + cmd=`curl --head -m10 --request GET ${2} | grep '200 OK' > /dev/null` + rc=$? + if [[ $rc == 0 ]]; then + true + else + false + fi +} + +function check_modules() { + echo -e "Checking modules" + failed_modules=() + for module in "${modules[@]}" + do + if ! check_connectivity $module "${urls[$module]}"; then + if ! is_deploying $module; then + echo -e "$module failed" + failed_modules+=($module) + fi + fi + done + if [ ! -z "$failed_modules" ]; then + echo -e "Failed Modules: $failed_modules" + false + else + echo -e "All modules working good" + exit 0 + fi +} + +### Functions related fixes. + +function restart_docker_fix() { + echo -e "Running restart_docker_fix" + service docker restart + start_containers_fix "${modules[@]}" +} + +function docker_proxy_fix() { + echo -e "Running docker_proxy_fix" + fix_modules=("${@}") + for module in "${fix_modules[@]}" + do + echo -e "Kill docker proxy and restart containers" + pid=$(netstat -nlp | grep :${ports[$module]} | awk '{print $7}' | cut -d'/' -f1) + echo $pid + if [ ! -z "$pid" ]; then + kill $pid + start_container_fix $module + fi + done +} + +function start_containers_fix() { + start_modules=("${@}") + for module in "${start_modules[@]}" + do + start_container_fix $module + done +} + +function start_container_fix() { + echo -e "Starting a container $module" + sudo docker restart $module + sleep 5 + if ! check_connectivity $module "${urls[$module]}"; then + echo -e "Starting an old container $module_old" + sudo docker restart $module"_old" + sleep 5 + fi +} + +### Main Flow + +echo -e +echo -e "WatchDog Started" +echo -e +echo -e `date "+%Y-%m-%d %H:%M:%S.%N"` +echo -e + +## If the problem is related to docker daemon + +if ! get_docker_status; then + restart_docker_fix + if ! check_modules; then + echo -e "Watchdog failed while restart_docker_fix" + fi + exit +fi + +## If the problem is related to docker proxy + +if ! check_modules; then + docker_proxy_fix "${failed_modules[@]}" +fi + +## If any other problem : restart docker + +if ! check_modules; then + restart_docker_fix +fi + +## If nothing works out + +if ! check_modules; then + echo -e "Watchdog failed" +fi + +sudo docker ps +sudo docker images
\ No newline at end of file |