blob: 51868d7099709badf1ea8959cd30c25f1e455594 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
# *
# http://www.apache.org/licenses/LICENSE-2.0 *
# *
# Unless required by applicable law or agreed to in writing, *
# software distributed under the License is distributed on an *
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
# KIND, either express or implied. See the License for the *
# specific language governing permissions and limitations *
# under the License. *
# This script checks if deployments are working or and then
# starts the specified containers in case one of the containers
# crash. The only solution is restarting docker as of now.
#!/bin/bash
## List of modules
modules=(testapi reporting)
## Ports of the modules
declare -A ports=( ["testapi"]="8082" ["reporting"]="8084")
## Urls to check if the modules are deployed or not ?
declare -A urls=( ["testapi"]="http://testresults.opnfv.org/test/" \
["reporting"]="http://testresults.opnfv.org/reporting/index.html")
### Functions related to checking.
function is_deploying() {
xml=$(curl -m10 "https://build.opnfv.org/ci/job/${1}-automate-docker-deploy-master/lastBuild/api/xml?depth=1")
building=$(grep -oPm1 "(?<=<building>)[^<]+" <<< "$xml")
if [[ $building == "false" ]]
then
false
else
true
fi
}
function get_docker_status() {
status=$(service docker status | sed -n 3p | cut -d ' ' -f5)
echo -e "Docker status: $status"
if [ $status = "active" ]
then
true
else
false
fi
}
function check_connectivity() {
echo "Checking $1 connection : $2"
cmd=`curl --head -m10 --request GET ${2} | grep '200 OK' > /dev/null`
rc=$?
if [[ $rc == 0 ]]; then
true
else
false
fi
}
function check_modules() {
echo -e "Checking modules"
failed_modules=()
for module in "${modules[@]}"
do
if ! check_connectivity $module "${urls[$module]}"; then
if ! is_deploying $module; then
echo -e "$module failed"
failed_modules+=($module)
fi
fi
done
if [ ! -z "$failed_modules" ]; then
echo -e "Failed Modules: $failed_modules"
false
else
echo -e "All modules working good"
exit 0
fi
}
### Functions related fixes.
function restart_docker_fix() {
echo -e "Running restart_docker_fix"
service docker restart
start_containers_fix "${modules[@]}"
}
function docker_proxy_fix() {
echo -e "Running docker_proxy_fix"
fix_modules=("${@}")
for module in "${fix_modules[@]}"
do
echo -e "Kill docker proxy and restart containers"
pid=$(netstat -nlp | grep :${ports[$module]} | awk '{print $7}' | cut -d'/' -f1)
echo $pid
if [ ! -z "$pid" ]; then
kill $pid
start_container_fix $module
fi
done
}
function start_containers_fix() {
start_modules=("${@}")
for module in "${start_modules[@]}"
do
start_container_fix $module
done
}
function start_container_fix() {
echo -e "Starting a container $module"
sudo docker restart $module
sleep 5
if ! check_connectivity $module "${urls[$module]}"; then
echo -e "Starting an old container $module_old"
sudo docker restart $module"_old"
sleep 5
fi
}
### Main Flow
echo -e
echo -e "WatchDog Started"
echo -e
echo -e `date "+%Y-%m-%d %H:%M:%S.%N"`
echo -e
## If the problem is related to docker daemon
if ! get_docker_status; then
restart_docker_fix
if ! check_modules; then
echo -e "Watchdog failed while restart_docker_fix"
fi
exit
fi
## If the problem is related to docker proxy
if ! check_modules; then
docker_proxy_fix "${failed_modules[@]}"
fi
## If any other problem : restart docker
if ! check_modules; then
restart_docker_fix
fi
## If nothing works out
if ! check_modules; then
echo -e "Watchdog failed"
fi
sudo docker ps
sudo docker images
|