summaryrefslogtreecommitdiffstats
path: root/opts/watchdog.sh
blob: 51868d7099709badf1ea8959cd30c25f1e455594 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#                                                               *
#    http://www.apache.org/licenses/LICENSE-2.0                 *
#                                                               *
#  Unless required by applicable law or agreed to in writing,   *
#  software distributed under the License is distributed on an  *
#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
#  KIND, either express or implied.  See the License for the    *
#  specific language governing permissions and limitations      *
#  under the License.                                           *

# This script checks if deployments are working or and then
# starts the specified containers in case one of the containers
# crash. The only solution is restarting docker as of now.

#!/bin/bash

## List of modules
modules=(testapi reporting)

## Ports of the modules
declare -A ports=( ["testapi"]="8082" ["reporting"]="8084")

## Urls to check if the modules are deployed or not ?
declare -A urls=( ["testapi"]="http://testresults.opnfv.org/test/" \
    ["reporting"]="http://testresults.opnfv.org/reporting/index.html")

### Functions related to checking.

function is_deploying() {
    xml=$(curl -m10 "https://build.opnfv.org/ci/job/${1}-automate-docker-deploy-master/lastBuild/api/xml?depth=1")
    building=$(grep -oPm1 "(?<=<building>)[^<]+" <<< "$xml")
    if [[ $building == "false" ]]
    then
        false
    else
        true
    fi
}

function get_docker_status() {
    status=$(service docker status | sed -n 3p | cut -d ' ' -f5)
    echo -e "Docker status: $status"
    if [ $status = "active" ]
    then
        true
    else
        false
    fi
}

function check_connectivity() {
    echo "Checking $1 connection : $2"
    cmd=`curl --head -m10 --request GET ${2} | grep '200 OK' > /dev/null`
    rc=$?
    if [[ $rc == 0 ]]; then
        true
    else
        false
    fi
}

function check_modules() {
    echo -e "Checking modules"
    failed_modules=()
    for module in "${modules[@]}"
    do
        if ! check_connectivity $module "${urls[$module]}"; then
            if ! is_deploying $module; then
                echo -e "$module failed"
                failed_modules+=($module)
            fi
        fi
    done
    if [ ! -z "$failed_modules" ]; then
        echo -e "Failed Modules: $failed_modules"
        false
    else
        echo -e "All modules working good"
        exit 0
    fi
}

### Functions related fixes.

function restart_docker_fix() {
    echo -e "Running restart_docker_fix"
    service docker restart
    start_containers_fix "${modules[@]}"
}

function docker_proxy_fix() {
    echo -e "Running docker_proxy_fix"
    fix_modules=("${@}")
    for module in "${fix_modules[@]}"
    do
        echo -e "Kill docker proxy and restart containers"
        pid=$(netstat -nlp | grep :${ports[$module]} | awk '{print $7}' | cut -d'/' -f1)
        echo $pid
        if [ ! -z "$pid" ]; then
            kill $pid
            start_container_fix $module
        fi
    done
}

function start_containers_fix() {
    start_modules=("${@}")
    for module in "${start_modules[@]}"
    do
        start_container_fix $module
    done
}

function start_container_fix() {
    echo -e "Starting a container $module"
    sudo docker restart $module
    sleep 5
    if ! check_connectivity $module "${urls[$module]}"; then
        echo -e "Starting an old container $module_old"
        sudo docker restart $module"_old"
        sleep 5
    fi
}

### Main Flow

echo -e
echo -e "WatchDog Started"
echo -e
echo -e `date "+%Y-%m-%d %H:%M:%S.%N"`
echo -e

## If the problem is related to docker daemon

if ! get_docker_status; then
    restart_docker_fix
    if ! check_modules; then
        echo -e "Watchdog failed while restart_docker_fix"
    fi
    exit
fi

## If the problem is related to docker proxy

if ! check_modules; then
    docker_proxy_fix "${failed_modules[@]}"
fi

## If any other problem : restart docker

if ! check_modules; then
    restart_docker_fix
fi

## If nothing works out

if ! check_modules; then
    echo -e "Watchdog failed"
fi

sudo docker ps
sudo docker images