diff options
author | agardner <agardner@linuxfoundation.org> | 2018-02-23 14:08:50 -0500 |
---|---|---|
committer | agardner <agardner@linuxfoundation.org> | 2018-02-23 14:27:26 -0500 |
commit | 6a8416dc9ba2870f816cb4bd722668be83f099c7 (patch) | |
tree | 853d1f4e271be2d92eb9477d159df41601d8bc59 | |
parent | cf7adb151cc01ab9a6ff7479120a43dc0f0375e5 (diff) |
Fix slave monitor job.
must be run on the same builder each time.
output goes to /tmp/ its just a textfile per slave.
hasetbin stopped working, so I turend off the silly graphs.
Change-Id: I7300710c338f74e5426ab3cd2e2707ac2553f982
Signed-off-by: agardner <agardner@linuxfoundation.org>
-rw-r--r-- | jjb/releng/opnfv-utils.yml | 13 | ||||
-rwxr-xr-x[-rw-r--r--] | utils/slave-monitor-0.1.sh | 101 |
2 files changed, 55 insertions, 59 deletions
diff --git a/jjb/releng/opnfv-utils.yml b/jjb/releng/opnfv-utils.yml index 93b3615ef..1e640db7c 100644 --- a/jjb/releng/opnfv-utils.yml +++ b/jjb/releng/opnfv-utils.yml @@ -89,7 +89,6 @@ name: 'check-status-of-slaves' disabled: false - concurrent: true parameters: @@ -97,17 +96,23 @@ name: SLAVE_NAME description: 'script lives on master node' default-slaves: - - master + - lf-build1 allowed-multiselect: false ignore-offline-nodes: true + - project-parameter: + project: releng + branch: master + + scm: + - git-scm triggers: - timed: '@midnight' builders: - shell: | - cd /opt/jenkins-ci/slavemonitor - bash slave-monitor-0.1.sh | sort + cd $WORKSPACE/utils/ + bash slave-monitor-0.1.sh - job-template: name: 'ansible-build-server' diff --git a/utils/slave-monitor-0.1.sh b/utils/slave-monitor-0.1.sh index 161aaef21..5201f93d6 100644..100755 --- a/utils/slave-monitor-0.1.sh +++ b/utils/slave-monitor-0.1.sh @@ -8,9 +8,8 @@ # http://www.apache.org/licenses/LICENSE-2.0 ############################################################################## -#This will put a bunch of files in the pwd. you have been warned. #Counts how long slaves have been online or offline - +#exec 2>/dev/null #Yes I know about jq curlcommand() { @@ -25,74 +24,66 @@ curl -s "https://build.opnfv.org/ci/computer/api/json?tree=computer\[displayName | sed s,\",,g } -if [ -f podoutput-current ]; then - cp podoutput-current podoutput-lastiteration -fi - -curlcommand > podoutput-current +curlcommand > /tmp/podoutput-current -declare -A slavescurrent slaveslastiteration +declare -A slavescurrent while read -r name status ; do slavescurrent["$name"]="$status" -done < <(cat podoutput-current) - -while read -r name status ; do - slaveslastiteration["$name"]=$status -done < <(cat podoutput-lastiteration) - +done < <(cat /tmp/podoutput-current) + +#haste bin stopped allowing post :( +#files=(*online) +#for ((i=0; i<${#files[@]}; i+=9)); do +#./eplot -d -r [-1:74][-1:30] -m ${files[i]} ${files[i+1]} ${files[i+2]} ${files[i+3]} ${files[i+4]} ${files[i+5]} ${files[i+6]} ${files[i+7]} ${files[i+8]} ${files[i+9]} +#done | ./haste.bash +## main () { + for slavename in "${!slavescurrent[@]}"; do - #Slave is online. Mark it down. + + #Slave is online. Mark it down. if [ "${slavescurrent[$slavename]}" == "false" ]; then - if [ -f "$slavename"-offline ]; then - echo "removing offline status from $slavename slave was offline for $(cat "$slavename"-offline ) iterations" - rm "$slavename"-offline - fi - - if ! [ -f "$slavename"-online ]; then - echo "1" > "$slavename"-online - elif [ -f "$slavename"-online ]; then - #read and increment slavename - read -r -d $'\x04' var < "$slavename"-online - ((var++)) - echo -n "ONLINE $slavename " - echo "for $var iterations" - echo "$var" > "$slavename"-online - fi - fi + if ! [ -f /tmp/"$slavename"-online ]; then + echo "1" > /tmp/"$slavename"-online + echo "new online slave file created $slavename ${slavescurrent[$slavename]} up for 1 iterations" + fi - #went offline since last iteration. - if [ "${slavescurrent[$slavename]}" == "false" ] && [ "${slaveslastiteration[$slavename]}" == "true" ]; then - echo "JUST WENT OFFLINE $slavename " - if [ -f "$slavename"-online ]; then - echo "removing online status from $slavename. slave was online for $(cat "$slavename"-online ) iterations" - rm "$slavename"-online - fi + #read and increment slavename + var="$(cat /tmp/"$slavename"-online |tail -n 1)" + if [[ "$var" == "0" ]]; then + echo "slave $slavename ${slavescurrent[$slavename]} back up for $var iterations" + fi + ((var++)) + echo "$var" >> /tmp/"$slavename"-online + unset var + echo "$slavename up $(cat /tmp/$slavename-online | tail -n 10 | xargs)" fi - #slave is offline + #slave is offline remove all points if [ "${slavescurrent[$slavename]}" == "true" ]; then - if ! [ -f "$slavename"-offline ]; then - echo "1" > "$slavename"-offline - fi - - if [ -f "$slavename"-offline ]; then - #read and increment slavename - read -r -d $'\x04' var < "$slavename"-offline - ((var++)) - echo "$var" > "$slavename"-offline - if [ "$var" -gt "30" ]; then - echo "OFFLINE FOR $var ITERATIONS REMOVE $slavename " - else - echo "OFFLINE $slavename FOR $var ITERATIONS " - fi - fi + if ! [ -f /tmp/"$slavename"-online ]; then + echo "0" > /tmp/"$slavename"-online + echo "new offline slave file created $slavename ${slavescurrent[$slavename]} up for 0 iterations" + + fi + var="$(cat /tmp/"$slavename"-online |tail -n 1)" + + if [[ "$var" != "0" ]]; then + echo "slave $slavename ${slavescurrent[$slavename]} was up for $var iterations" + echo "slave $slavename ${slavescurrent[$slavename]} has gone offline, was $var iterations now reset to 0" + fi + + echo "0" >> /tmp/"$slavename"-online + echo "$slavename down $(cat /tmp/$slavename-online | tail -n 10 | xargs)" + unset var + fi + done } -main +main | sort | column -t |