summaryrefslogtreecommitdiffstats
path: root/manifests
diff options
context:
space:
mode:
authorMichele Baldessari <michele@acksyn.org>2016-09-29 18:35:25 +0200
committerMichele Baldessari <michele@acksyn.org>2016-10-05 07:50:04 +0200
commit59a5f37c652c5cdad59723b5f96d808ff1558c90 (patch)
tree86beb60f8199c3dde010983a6bdfb2d43fbc8c5f /manifests
parent264d49a255024eba9ea3bf2086c63e9d49b6da11 (diff)
Change rabbitmq queues HA mode from ha-all to ha-exactly
It turns out that reducing number of rabbitmq queues in cluster significantly improves performance of cluster especially in the case of failover recovery time. Right now the cluster uses ha-all mode for rabbitmq queues. It is best to change this to "ha-exactly" mode and reduce the number of queue copies to ceil(N/2) where N is number of controllers in the cluster - so in typical scenario of 3 controller It would be 2 by default. It does not make much sense to keep the copies of queues over whole cluster since if the quorum of nodes is lost then the rest of cluster nodes will be stopped anyway. We let the user override this with a parameter. I.e. for a 3 node controlplane cluster we will go from this: pcs resource show rabbitmq Resource: rabbitmq (class=ocf provider=heartbeat type=rabbitmq-cluster) Attributes: set_policy="ha-all ^(?!amq\.).* {"ha-mode":"all"}" To this: pcs resource show rabbitmq Resource: rabbitmq (class=ocf provider=heartbeat type=rabbitmq-cluster) Attributes: set_policy="ha-all ^(?!amq\.).* {"ha-mode":"exactly","ha-params":2}" According to Marin Krcmarik's testing recovery time from failure was reduced significantly. Co-Authored-By: Marian Krcmarik <mkrcmari@redhat.com> Change-Id: Ib62001c03e1e08f58cf0c6e0ba07a8879a584084 Partial-Bug: #1628998
Diffstat (limited to 'manifests')
-rw-r--r--manifests/profile/pacemaker/rabbitmq.pp22
1 files changed, 21 insertions, 1 deletions
diff --git a/manifests/profile/pacemaker/rabbitmq.pp b/manifests/profile/pacemaker/rabbitmq.pp
index 1f25e8b..8d5f9d0 100644
--- a/manifests/profile/pacemaker/rabbitmq.pp
+++ b/manifests/profile/pacemaker/rabbitmq.pp
@@ -26,6 +26,16 @@
# (Optional) Content of erlang cookie.
# Defaults to hiera('rabbitmq::erlang_cookie').
#
+# [*user_ha_queues*]
+# (Optional) The number of HA queues in to be configured in rabbitmq
+# Defaults to hiera('rabbitmq::nr_ha_queues'), which is usually 0 meaning
+# that the queues number will be CEIL(N/2) where N is the number of rabbitmq
+# nodes.
+#
+# [*rabbit_nodes*]
+# (Optional) The list of rabbitmq nodes names
+# Defaults to hiera('rabbitmq_node_names')
+#
# [*step*]
# (Optional) The current step in deployment. See tripleo-heat-templates
# for more details.
@@ -34,6 +44,8 @@
class tripleo::profile::pacemaker::rabbitmq (
$bootstrap_node = hiera('bootstrap_nodeid'),
$erlang_cookie = hiera('rabbitmq::erlang_cookie'),
+ $user_ha_queues = hiera('rabbitmq::nr_ha_queues', 0),
+ $rabbit_nodes = hiera('rabbitmq_node_names'),
$step = hiera('step'),
) {
if $::hostname == downcase($bootstrap_node) {
@@ -61,9 +73,17 @@ class tripleo::profile::pacemaker::rabbitmq (
}
if $step >= 2 and $pacemaker_master {
+ include ::stdlib
+ # The default nr of ha queues is ceiling(N/2)
+ if $user_ha_queues == 0 {
+ $nr_rabbit_nodes = size($rabbit_nodes)
+ $nr_ha_queues = $nr_rabbit_nodes / 2 + ($nr_rabbit_nodes % 2)
+ } else {
+ $nr_ha_queues = $user_ha_queues
+ }
pacemaker::resource::ocf { 'rabbitmq':
ocf_agent_name => 'heartbeat:rabbitmq-cluster',
- resource_params => 'set_policy=\'ha-all ^(?!amq\.).* {"ha-mode":"all"}\'',
+ resource_params => "set_policy='ha-all ^(?!amq\\.).* {\"ha-mode\":\"exactly\",\"ha-params\":${nr_ha_queues}}'",
clone_params => 'ordered=true interleave=true',
meta_params => 'notify=true',
require => Class['::rabbitmq'],