diff options
author | agardner <agardner@linuxfoundation.org> | 2017-08-31 15:33:49 -0400 |
---|---|---|
committer | agardner <agardner@linuxfoundation.org> | 2017-09-05 14:05:10 -0400 |
commit | a288289c8b0426a7925e8a3ea81cdaf282e34bc7 (patch) | |
tree | 7d9a15a401e31a62edce3eef6ffa83f7fa9cb0f3 | |
parent | 6256ecdb5ffff289624d94630145c7af13c8378a (diff) |
Just some job that lightly monitors slaves
I would like to remove slaves that are offline
eg: after one month.
This script doesn't remove anthing, or email anyone, but you can look at
its output if you are interested.
Counts how long slaves have been online or offline
not sure why jenkins doesn't come with this functionality.
I just threw the script on the master node into
/opt/jenkins-ci/slavemonitor
Script writes data to be read the next day, and I don't want to worry about the
workspace getting wiped.
you can look at the scipt here:
https://gist.github.com/Aricg/d770f7d0a4e88d149ddb6715dc4ad0ba
But I don't recommend it.
Example of what output looks like:
JUST WENT OFFLINE arm-build3
JUST WENT OFFLINE arm-pod5
JUST WENT OFFLINE ericsson-build3
JUST WENT OFFLINE ericsson-virtual5
OFFLINE cengn-pod1 FOR 7 ITERATIONS
OFFLINE cisco-virtual1 FOR 7 ITERATIONS
OFFLINE ericsson-virtual8 FOR 7 ITERATIONS
OFFLINE ericsson-virtual9 FOR 7 ITERATIONS
OFFLINE intel-pod3 FOR 7 ITERATIONS
OFFLINE intel-pod6 FOR 7 ITERATIONS
OFFLINE intel-pod8 FOR 7 ITERATIONS
OFFLINE intel-virtual1 FOR 7 ITERATIONS
ONLINE lf-build1 for 6 iterations
ONLINE lf-build2 for 6 iterations
ONLINE lf-pod1 for 6 iterations
ONLINE lf-pod2 for 6 iterations
ONLINE lf-virtual1 for 6 iterations
ONLINE lf-virtual2 for 6 iterations
ONLINE lf-virtual3 for 6 iterations
ONLINE zte-pod1 for 6 iterations
ONLINE zte-pod2 for 6 iterations
ONLINE zte-pod3 for 6 iterations
ONLINE zte-pod4 for 6 iterations
ONLINE zte-virtual1 for 6 iterations
ONLINE zte-virtual2 for 6 iterations
ONLINE zte-virtual3 for 6 iterations
ONLINE zte-virtual4 for 6 iterations
removing online status from arm-build3. slave was online for 1
iterations
removing online status from arm-pod5. slave was online for 1 iterations
removing online status from ericsson-build3. slave was online for 1
iterations
removing online status from ericsson-virtual5. slave was online for 1
iterations
Change-Id: I652ef5460cb1ce45ff4e87828eb779da7febe8b4
Signed-off-by: agardner <agardner@linuxfoundation.org>
-rw-r--r-- | jjb/releng/opnfv-utils.yml | 25 | ||||
-rw-r--r-- | utils/slave-monitor-0.1.sh | 98 |
2 files changed, 123 insertions, 0 deletions
diff --git a/jjb/releng/opnfv-utils.yml b/jjb/releng/opnfv-utils.yml index ac1ec07f4..721b5dede 100644 --- a/jjb/releng/opnfv-utils.yml +++ b/jjb/releng/opnfv-utils.yml @@ -5,6 +5,7 @@ jobs: - 'prune-docker-images' - 'archive-repositories' + - 'check-status-of-slaves' ######################## # job templates @@ -62,3 +63,27 @@ builders: - shell: !include-raw-escape: opnfv-repo-archiver.sh + +- job-template: + name: 'check-status-of-slaves' + + disabled: false + + concurrent: true + + parameters: + - node: + name: SLAVE_NAME + description: We don't want workspace wiped. so I just threw the script on the master + default-slaves: + - master + allowed-multiselect: false + ignore-offline-nodes: true + + triggers: + - timed: '@midnight' + + builders: + - shell: | + cd /opt/jenkins-ci/slavemonitor + bash slave-monitor-0.1.sh | sort diff --git a/utils/slave-monitor-0.1.sh b/utils/slave-monitor-0.1.sh new file mode 100644 index 000000000..161aaef21 --- /dev/null +++ b/utils/slave-monitor-0.1.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# SPDX-license-identifier: Apache-2.0 +############################################################################## +# Copyright (c) 2016 Linux Foundation and others. +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +#This will put a bunch of files in the pwd. you have been warned. +#Counts how long slaves have been online or offline + + +#Yes I know about jq +curlcommand() { +curl -s "https://build.opnfv.org/ci/computer/api/json?tree=computer\[displayName,offline\]" \ + | awk -v k=":" '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]}' \ + | grep -v "_class" \ + | awk 'NR%2{printf "%s ",$0;next;}1' \ + | awk -F":" '{print $2,$3}' \ + | awk '{print $1,$3}' \ + | sed s,\},,g \ + | sed s,],,g \ + | sed s,\",,g +} + +if [ -f podoutput-current ]; then + cp podoutput-current podoutput-lastiteration +fi + +curlcommand > podoutput-current + +declare -A slavescurrent slaveslastiteration + +while read -r name status ; do + slavescurrent["$name"]="$status" +done < <(cat podoutput-current) + +while read -r name status ; do + slaveslastiteration["$name"]=$status +done < <(cat podoutput-lastiteration) + +main () { +for slavename in "${!slavescurrent[@]}"; do + #Slave is online. Mark it down. + if [ "${slavescurrent[$slavename]}" == "false" ]; then + + if [ -f "$slavename"-offline ]; then + echo "removing offline status from $slavename slave was offline for $(cat "$slavename"-offline ) iterations" + rm "$slavename"-offline + fi + + if ! [ -f "$slavename"-online ]; then + echo "1" > "$slavename"-online + elif [ -f "$slavename"-online ]; then + #read and increment slavename + read -r -d $'\x04' var < "$slavename"-online + ((var++)) + echo -n "ONLINE $slavename " + echo "for $var iterations" + echo "$var" > "$slavename"-online + fi + fi + + #went offline since last iteration. + if [ "${slavescurrent[$slavename]}" == "false" ] && [ "${slaveslastiteration[$slavename]}" == "true" ]; then + echo "JUST WENT OFFLINE $slavename " + if [ -f "$slavename"-online ]; then + echo "removing online status from $slavename. slave was online for $(cat "$slavename"-online ) iterations" + rm "$slavename"-online + fi + + fi + + #slave is offline + if [ "${slavescurrent[$slavename]}" == "true" ]; then + if ! [ -f "$slavename"-offline ]; then + echo "1" > "$slavename"-offline + fi + + if [ -f "$slavename"-offline ]; then + #read and increment slavename + read -r -d $'\x04' var < "$slavename"-offline + ((var++)) + echo "$var" > "$slavename"-offline + if [ "$var" -gt "30" ]; then + echo "OFFLINE FOR $var ITERATIONS REMOVE $slavename " + else + echo "OFFLINE $slavename FOR $var ITERATIONS " + fi + fi + fi + +done +} + +main |