aboutsummaryrefslogtreecommitdiffstats
path: root/extraconfig/tasks/pacemaker_common_functions.sh
diff options
context:
space:
mode:
authormarios <marios@redhat.com>2016-05-25 11:56:02 +0300
committerEmilien Macchi <emilien@redhat.com>2016-09-17 04:46:24 +0000
commitfb25385d34e604d2f670cebe3e03fd57c14fa6be (patch)
treeadce59c96cdd5bdf96b9fc644baecadb036254b3 /extraconfig/tasks/pacemaker_common_functions.sh
parent430db61b817d8e276e3986777fe96a66546c5222 (diff)
Rework the pacemaker_common_functions for M..N upgrades
For N we cannot assume services are managed by pacemaker. This adds functions to check if a service is systemd or pcmk managed and start/stops it accordingly. For pcmk, only stop/disable on bootstrap node for example, whereas systemd should stop/start on all controllers. There is also an equivalent change to the check_resource which has been reworked to allow both pcmk and systemd. Implements: blueprint overcloud-upgrades-workflow-mitaka-to-newton Change-Id: Ic8252736781dc906b3aef8fc756eb8b2f3bb1f02
Diffstat (limited to 'extraconfig/tasks/pacemaker_common_functions.sh')
-rwxr-xr-xextraconfig/tasks/pacemaker_common_functions.sh281
1 files changed, 259 insertions, 22 deletions
diff --git a/extraconfig/tasks/pacemaker_common_functions.sh b/extraconfig/tasks/pacemaker_common_functions.sh
index 7d794c97..4f17b69a 100755
--- a/extraconfig/tasks/pacemaker_common_functions.sh
+++ b/extraconfig/tasks/pacemaker_common_functions.sh
@@ -2,51 +2,286 @@
set -eu
-function check_resource {
+DEBUG="true" # set false if the verbosity is a problem
+SCRIPT_NAME=$(basename $0)
+function log_debug {
+ if [[ $DEBUG = "true" ]]; then
+ echo "`date` $SCRIPT_NAME tripleo-upgrade $(facter hostname) $1"
+ fi
+}
+
+function is_bootstrap_node {
+ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
+ log_debug "Node is bootstrap"
+ echo "true"
+ fi
+}
+function check_resource_pacemaker {
if [ "$#" -ne 3 ]; then
- echo_error "ERROR: check_resource function expects 3 parameters, $# given"
- exit 1
+ echo_error "ERROR: check_resource function expects 3 parameters, $# given"
+ exit 1
fi
- service=$1
- state=$2
- timeout=$3
+ local service=$1
+ local state=$2
+ local timeout=$3
+
+ if [[ -z $(is_bootstrap_node) ]] ; then
+ log_debug "Node isn't bootstrap, skipping check for $service to be $state here "
+ return
+ else
+ log_debug "Node is bootstrap checking $service to be $state here"
+ fi
if [ "$state" = "stopped" ]; then
- match_for_incomplete='Started'
+ match_for_incomplete='Started'
else # started
- match_for_incomplete='Stopped'
+ match_for_incomplete='Stopped'
fi
nodes_local=$(pcs status | grep ^Online | sed 's/.*\[ \(.*\) \]/\1/g' | sed 's/ /\|/g')
if timeout -k 10 $timeout crm_resource --wait; then
- node_states=$(pcs status --full | grep "$service" | grep -v Clone | { egrep "$nodes_local" || true; } )
- if echo "$node_states" | grep -q "$match_for_incomplete"; then
- echo_error "ERROR: cluster finished transition but $service was not in $state state, exiting."
- exit 1
- else
- echo "$service has $state"
- fi
- else
- echo_error "ERROR: cluster remained unstable for more than $timeout seconds, exiting."
+ node_states=$(pcs status --full | grep "$service" | grep -v Clone | { egrep "$nodes_local" || true; } )
+ if echo "$node_states" | grep -q "$match_for_incomplete"; then
+ echo_error "ERROR: cluster finished transition but $service was not in $state state, exiting."
exit 1
+ else
+ echo "$service has $state"
+ fi
+ else
+ echo_error "ERROR: cluster remained unstable for more than $timeout seconds, exiting."
+ exit 1
+ fi
+
+}
+
+function pcmk_running {
+ if [[ $(systemctl is-active pacemaker) = "active" ]] ; then
+ echo "true"
+ fi
+}
+
+function is_systemd_unknown {
+ local service=$1
+ if [[ $(systemctl is-active "$service") = "unknown" ]]; then
+ log_debug "$service found to be unkown to systemd"
+ echo "true"
+ fi
+}
+
+function grep_is_cluster_controlled {
+ local service=$1
+ if [[ -n $(systemctl status $service -l | grep Drop-In -A 5 | grep pacemaker) ||
+ -n $(systemctl status $service -l | grep "Cluster Controlled $service") ]] ; then
+ log_debug "$service is pcmk managed from systemctl grep"
+ echo "true"
+ fi
+}
+
+
+function is_systemd_managed {
+ local service=$1
+ #if we have pcmk check to see if it is managed there
+ if [[ -n $(pcmk_running) ]]; then
+ if [[ -z $(pcs status --full | grep $service) && -z $(is_systemd_unknown $service) ]] ; then
+ log_debug "$service found to be systemd managed from pcs status"
+ echo "true"
+ fi
+ else
+ # if it is "unknown" to systemd, then it is pacemaker managed
+ if [[ -n $(is_systemd_unknown $service) ]] ; then
+ return
+ elif [[ -z $(grep_is_cluster_controlled $service) ]] ; then
+ echo "true"
+ fi
+ fi
+}
+
+function is_pacemaker_managed {
+ local service=$1
+ #if we have pcmk check to see if it is managed there
+ if [[ -n $(pcmk_running) ]]; then
+ if [[ -n $(pcs status --full | grep $service) ]]; then
+ log_debug "$service found to be pcmk managed from pcs status"
+ echo "true"
+ fi
+ else
+ # if it is unknown to systemd, then it is pcmk managed
+ if [[ -n $(is_systemd_unknown $service) ]]; then
+ echo "true"
+ elif [[ -n $(grep_is_cluster_controlled $service) ]] ; then
+ echo "true"
+ fi
+ fi
+}
+
+function is_managed {
+ local service=$1
+ if [[ -n $(is_pacemaker_managed $service) || -n $(is_systemd_managed $service) ]]; then
+ echo "true"
+ fi
+}
+
+function check_resource_systemd {
+
+ if [ "$#" -ne 3 ]; then
+ echo_error "ERROR: check_resource function expects 3 parameters, $# given"
+ exit 1
fi
+ local service=$1
+ local state=$2
+ local timeout=$3
+ local check_interval=3
+
+ if [ "$state" = "stopped" ]; then
+ match_for_incomplete='active'
+ else # started
+ match_for_incomplete='inactive'
+ fi
+
+ log_debug "Going to check_resource_systemd for $service to be $state"
+
+ #sanity check is systemd managed:
+ if [[ -z $(is_systemd_managed $service) ]]; then
+ echo "ERROR - $service not found to be systemd managed."
+ exit 1
+ fi
+
+ tstart=$(date +%s)
+ tend=$(( $tstart + $timeout ))
+ while (( $(date +%s) < $tend )); do
+ if [[ "$(systemctl is-active $service)" = $match_for_incomplete ]]; then
+ echo "$service not yet $state, sleeping $check_interval seconds."
+ sleep $check_interval
+ else
+ echo "$service is $state"
+ return
+ fi
+ done
+
+ echo "Timed out waiting for $service to go to $state after $timeout seconds"
+ exit 1
+}
+
+
+function check_resource {
+ local service=$1
+ local pcmk_managed=$(is_pacemaker_managed $service)
+ local systemd_managed=$(is_systemd_managed $service)
+
+ if [[ -n $pcmk_managed && -n $systemd_managed ]] ; then
+ log_debug "ERROR $service managed by both systemd and pcmk - SKIPPING"
+ return
+ fi
+
+ if [[ -n $pcmk_managed ]]; then
+ check_resource_pacemaker $@
+ return
+ elif [[ -n $systemd_managed ]]; then
+ check_resource_systemd $@
+ return
+ fi
+ log_debug "ERROR cannot check_resource for $service, not managed here?"
+}
+
+function manage_systemd_service {
+ local action=$1
+ local service=$2
+ log_debug "Going to systemctl $action $service"
+ systemctl $action $service
+}
+
+function manage_pacemaker_service {
+ local action=$1
+ local service=$2
+ # not if pacemaker isn't running!
+ if [[ -z $(pcmk_running) ]]; then
+ echo "$(facter hostname) pacemaker not active, skipping $action $service here"
+ elif [[ -n $(is_bootstrap_node) ]]; then
+ log_debug "Going to pcs resource $action $service"
+ pcs resource $action $service
+ fi
+}
+
+function stop_or_disable_service {
+ local service=$1
+ local pcmk_managed=$(is_pacemaker_managed $service)
+ local systemd_managed=$(is_systemd_managed $service)
+
+ if [[ -n $pcmk_managed && -n $systemd_managed ]] ; then
+ log_debug "Skipping stop_or_disable $service due to management conflict"
+ return
+ fi
+
+ log_debug "Stopping or disabling $service"
+ if [[ -n $pcmk_managed ]]; then
+ manage_pacemaker_service disable $service
+ return
+ elif [[ -n $systemd_managed ]]; then
+ manage_systemd_service stop $service
+ return
+ fi
+ log_debug "ERROR: $service not managed here?"
+}
+
+function start_or_enable_service {
+ local service=$1
+ local pcmk_managed=$(is_pacemaker_managed $service)
+ local systemd_managed=$(is_systemd_managed $service)
+
+ if [[ -n $pcmk_managed && -n $systemd_managed ]] ; then
+ log_debug "Skipping start_or_enable $service due to management conflict"
+ return
+ fi
+
+ log_debug "Starting or enabling $service"
+ if [[ -n $pcmk_managed ]]; then
+ manage_pacemaker_service enable $service
+ return
+ elif [[ -n $systemd_managed ]]; then
+ manage_systemd_service start $service
+ return
+ fi
+ log_debug "ERROR $service not managed here?"
+}
+
+function restart_service {
+ local service=$1
+ local pcmk_managed=$(is_pacemaker_managed $service)
+ local systemd_managed=$(is_systemd_managed $service)
+
+ if [[ -n $pcmk_managed && -n $systemd_managed ]] ; then
+ log_debug "ERROR $service managed by both systemd and pcmk - SKIPPING"
+ return
+ fi
+
+ log_debug "Restarting $service"
+ if [[ -n $pcmk_managed ]]; then
+ manage_pacemaker_service restart $service
+ return
+ elif [[ -n $systemd_managed ]]; then
+ manage_systemd_service restart $service
+ return
+ fi
+ log_debug "ERROR $service not managed here?"
}
function echo_error {
echo "$@" | tee /dev/fd2
}
+# swift is a special case because it is/was never handled by pacemaker
+# when stand-alone swift is used, only swift-proxy is running on controllers
function systemctl_swift {
services=( openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \
openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \
openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy )
- action=$1
+ local action=$1
case $action in
stop)
- services=$(systemctl | grep swift | grep running | awk '{print $1}')
+ services=$(systemctl | grep openstack-swift- | grep running | awk '{print $1}')
;;
start)
enable_swift_storage=$(hiera -c /etc/puppet/hiera.yaml 'enable_swift_storage')
@@ -54,9 +289,11 @@ function systemctl_swift {
services=( openstack-swift-proxy )
fi
;;
- *) services=() ;; # for safetly, should never happen
+ *) echo "Unknown action $action passed to systemctl_swift"
+ exit 1
+ ;; # shouldn't ever happen...
esac
- for S in ${services[@]}; do
- systemctl $action $S
+ for service in ${services[@]}; do
+ manage_systemd_service $action $service
done
}